在 Tensorflow 中校正反卷积层
Correcting Deconvolution Layer in Tensorflow
我正在尝试使用 Tensorflow 构建变分自动编码器。我从最简单的模型开始。我有以下方法:
def conv_layer(x, w_shape, b_shape, padding='SAME'):
W = weight_variable(w_shape)
tf.summary.histogram(W.name, W)
b = bias_variable(b_shape)
tf.summary.histogram(b.name, b)
# Note that I used a stride of 2 on purpose in order not to use max pool layer.
activations = tf.nn.relu(tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding=padding) + b)
tf.summary.histogram(activations.name, activations)
return activations
def deconv_layer(x, w_shape, b_shape, padding="SAME"):
W = weight_variable(w_shape)
tf.summary.histogram(W.name, W)
b = bias_variable(b_shape)
tf.summary.histogram('bias', b)
x_shape = tf.shape(x)
out_shape = tf.stack([x_shape[0], x_shape[1], x_shape[2], w_shape[2]])
# Note that I have used a stride of 2 since I used a stride of 2 in conv layer.
transposed_activations = tf.nn.conv2d_transpose(x, W, out_shape, [1, 1, 1, 1], padding=padding) + b
tf.summary.histogram(transposed_activations.name, transposed_activations)
return transposed_activations
而整个网络的模型如下:
with tf.name_scope('conv1'):
conv1 = conv_layer(image, [3, 3, 3, 32], [32])
with tf.name_scope('conv2'):
conv2 = conv_layer(conv1, [3, 3, 32, 64], [64])
with tf.name_scope('conv3'):
conv3 = conv_layer(conv2, [3, 3, 64, 128], [128])
with tf.name_scope('conv4'):
conv4 = conv_layer(conv3, [3, 3, 128, 256], [256])
with tf.name_scope('z'):
z = conv_layer(conv4, [3, 3, 256, 256], [256])
with tf.name_scope('deconv4'):
deconv4 = deconv_layer(z, [3, 3, 128, 256], [128])
with tf.name_scope('deconv3'):
deconv3 = deconv_layer(deconv4, [3, 3, 64, 128], [64])
with tf.name_scope('deconv2'):
deconv2 = deconv_layer(deconv3, [3, 3, 32, 64], [32])
with tf.name_scope('deconv1'):
deconv_image = deconv_layer(deconv2, [3, 3, 3, 32], [3])
我从 FIFOQueue
获取我的图像并将它们输入到这个模型中。我的图片大小是 112, 112, 3
。我的问题是改变步幅时
[1, 1, 1, 1] to [1, 2, 2, 1]
在 conv 和 deconv 层中我得到以下错误:
InvalidArgumentError (see above for traceback): Conv2DSlowBackpropInput: Size of out_backprop doesn't match computed: actual = 4, computed = 2
[[Node: deconv4/conv2d_transpose = Conv2DBackpropInput[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 2, 2, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/gpu:0"](deconv4/stack, deconv4/Variable/read, z/Relu)]]
[[Node: deconv1/add/_17 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_85_deconv1/add", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
P.S:我知道我在 deconv 层缺少激活函数,但我想这与我得到的错误无关。
非常感谢任何帮助!!
此错误的原因如下:
如果我们假设每个转换层都有 stride = 2
,那么在我的例子中,当输入图像的大小为 112, 112, 3
时,在每个 conv layer
大小之后frames
或 feature maps
在应用卷积后减少了一半。也就是说,在 conv1
之后,图像的大小(高度、宽度)变为 [56, 56]
。 conv2
后,大小变为 [28, 28]
。 conv3
之后:[14, 14]
,conv4
之后:[7, 7]
。因此,应用一个名为 z
的额外 conv layer
已将维度减少到 [3, 3]
。问题来了:7 不能被 2 整除。所以我们得到一个不同的维度。从 [3, 3]
到 [112, 112]
是不可能的,因为在应用 deconv layer
之后。此外:
[3, 3] -> [6, 6] -> [12, 12] -> [24, 24] -> [48, 48]
第二个错误:deconv layer
中的输出形状应该是这样的:
# we should multiply x_shape[1] and x_shape[2] by 2.
out_shape = tf.stack([x_shape[0], x_shape[1] * 2, x_shape[2] * 2, w_shape[2]])
因此,最后的deconv layer
就变成了这样:
def deconv_layer(x, w_shape, b_shape, is_training, padding="SAME", activation='selu'):
W = weight_variable(w_shape)
tf.summary.histogram("weights", W)
b = bias_variable(b_shape)
tf.summary.histogram('biases', b)
x_shape = tf.shape(x)
# output shape: [batch_size, h * 2, w * 2, input_shape from w].
out_shape = tf.stack([x_shape[0], x_shape[1] * 2, x_shape[2] * 2, w_shape[2]])
# Note that I have used a stride of 2 since I used a stride of 2 in conv layer.
if activation == 'selu':
conv_trans = tf.nn.conv2d_transpose(x, W, out_shape, [1, 2, 2, 1], padding=padding) + b
transposed_activations = tf.nn.elu(conv_trans)
else:
conv_trans = tf.nn.conv2d_transpose(x, W, out_shape, [1, 2, 2, 1], padding=padding) + b
transposed_activations = tf.nn.sigmoid(conv_trans)
tf.summary.histogram("transpose_activation", transposed_activations)
return transposed_activations
因此,输出的大小与输入的大小不同,这就是我收到错误的原因。为了做到 back propagation
我们需要一个成本函数。此成本函数将取决于 output
和 input
。因此,如果它们的大小不同,则会导致错误。
解决这个问题的方法是让 conv layer, z,
有一个 stride of 1
。
我正在尝试使用 Tensorflow 构建变分自动编码器。我从最简单的模型开始。我有以下方法:
def conv_layer(x, w_shape, b_shape, padding='SAME'):
W = weight_variable(w_shape)
tf.summary.histogram(W.name, W)
b = bias_variable(b_shape)
tf.summary.histogram(b.name, b)
# Note that I used a stride of 2 on purpose in order not to use max pool layer.
activations = tf.nn.relu(tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding=padding) + b)
tf.summary.histogram(activations.name, activations)
return activations
def deconv_layer(x, w_shape, b_shape, padding="SAME"):
W = weight_variable(w_shape)
tf.summary.histogram(W.name, W)
b = bias_variable(b_shape)
tf.summary.histogram('bias', b)
x_shape = tf.shape(x)
out_shape = tf.stack([x_shape[0], x_shape[1], x_shape[2], w_shape[2]])
# Note that I have used a stride of 2 since I used a stride of 2 in conv layer.
transposed_activations = tf.nn.conv2d_transpose(x, W, out_shape, [1, 1, 1, 1], padding=padding) + b
tf.summary.histogram(transposed_activations.name, transposed_activations)
return transposed_activations
而整个网络的模型如下:
with tf.name_scope('conv1'):
conv1 = conv_layer(image, [3, 3, 3, 32], [32])
with tf.name_scope('conv2'):
conv2 = conv_layer(conv1, [3, 3, 32, 64], [64])
with tf.name_scope('conv3'):
conv3 = conv_layer(conv2, [3, 3, 64, 128], [128])
with tf.name_scope('conv4'):
conv4 = conv_layer(conv3, [3, 3, 128, 256], [256])
with tf.name_scope('z'):
z = conv_layer(conv4, [3, 3, 256, 256], [256])
with tf.name_scope('deconv4'):
deconv4 = deconv_layer(z, [3, 3, 128, 256], [128])
with tf.name_scope('deconv3'):
deconv3 = deconv_layer(deconv4, [3, 3, 64, 128], [64])
with tf.name_scope('deconv2'):
deconv2 = deconv_layer(deconv3, [3, 3, 32, 64], [32])
with tf.name_scope('deconv1'):
deconv_image = deconv_layer(deconv2, [3, 3, 3, 32], [3])
我从 FIFOQueue
获取我的图像并将它们输入到这个模型中。我的图片大小是 112, 112, 3
。我的问题是改变步幅时
[1, 1, 1, 1] to [1, 2, 2, 1]
在 conv 和 deconv 层中我得到以下错误:
InvalidArgumentError (see above for traceback): Conv2DSlowBackpropInput: Size of out_backprop doesn't match computed: actual = 4, computed = 2
[[Node: deconv4/conv2d_transpose = Conv2DBackpropInput[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 2, 2, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/gpu:0"](deconv4/stack, deconv4/Variable/read, z/Relu)]]
[[Node: deconv1/add/_17 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_85_deconv1/add", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
P.S:我知道我在 deconv 层缺少激活函数,但我想这与我得到的错误无关。 非常感谢任何帮助!!
此错误的原因如下:
如果我们假设每个转换层都有 stride = 2
,那么在我的例子中,当输入图像的大小为 112, 112, 3
时,在每个 conv layer
大小之后frames
或 feature maps
在应用卷积后减少了一半。也就是说,在 conv1
之后,图像的大小(高度、宽度)变为 [56, 56]
。 conv2
后,大小变为 [28, 28]
。 conv3
之后:[14, 14]
,conv4
之后:[7, 7]
。因此,应用一个名为 z
的额外 conv layer
已将维度减少到 [3, 3]
。问题来了:7 不能被 2 整除。所以我们得到一个不同的维度。从 [3, 3]
到 [112, 112]
是不可能的,因为在应用 deconv layer
之后。此外:
[3, 3] -> [6, 6] -> [12, 12] -> [24, 24] -> [48, 48]
第二个错误:deconv layer
中的输出形状应该是这样的:
# we should multiply x_shape[1] and x_shape[2] by 2.
out_shape = tf.stack([x_shape[0], x_shape[1] * 2, x_shape[2] * 2, w_shape[2]])
因此,最后的deconv layer
就变成了这样:
def deconv_layer(x, w_shape, b_shape, is_training, padding="SAME", activation='selu'):
W = weight_variable(w_shape)
tf.summary.histogram("weights", W)
b = bias_variable(b_shape)
tf.summary.histogram('biases', b)
x_shape = tf.shape(x)
# output shape: [batch_size, h * 2, w * 2, input_shape from w].
out_shape = tf.stack([x_shape[0], x_shape[1] * 2, x_shape[2] * 2, w_shape[2]])
# Note that I have used a stride of 2 since I used a stride of 2 in conv layer.
if activation == 'selu':
conv_trans = tf.nn.conv2d_transpose(x, W, out_shape, [1, 2, 2, 1], padding=padding) + b
transposed_activations = tf.nn.elu(conv_trans)
else:
conv_trans = tf.nn.conv2d_transpose(x, W, out_shape, [1, 2, 2, 1], padding=padding) + b
transposed_activations = tf.nn.sigmoid(conv_trans)
tf.summary.histogram("transpose_activation", transposed_activations)
return transposed_activations
因此,输出的大小与输入的大小不同,这就是我收到错误的原因。为了做到 back propagation
我们需要一个成本函数。此成本函数将取决于 output
和 input
。因此,如果它们的大小不同,则会导致错误。
解决这个问题的方法是让 conv layer, z,
有一个 stride of 1
。