Tensorflow中单应性的实现
Implementation of homography in Tensorflow
我正在尝试在 TensorFlow 中实现一个学习预测单应性的网络(Wiki). My network will output a 4-dimensional vector, which will be used to translate 2 images horizontally and vertically. Then, I calculate the error with a ground truth using a central crop (tf.image.central_crop
)of these warped images. I tried implementing it, and for the translation part, I used tf.contrib.image.translate
。但是,梯度没有流向网络的变量。我该如何解决这个问题?这是我得到的错误:
ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables ["<tf.Variable 'get_tr/w1:0' shape=(3, 3, 6, 64) dtype=float32_ref>", "<tf.Variable 'get_tr/b1:0' shape=(20, 298, 298, 64) dtype=float32_ref>", "<tf.Variable 'get_tr/w2:0' shape=(3, 3, 64, 64) dtype=float32_ref>", "<tf.Variable 'get_tr/b2:0' shape=(20, 296, 296, 64) dtype=float32_ref>", "<tf.Variable 'get_tr/w3:0' shape=(3, 3, 64, 128) dtype=float32_ref>", "<tf.Variable 'get_tr/b3:0' shape=(20, 147, 147, 128) dtype=float32_ref>", "<tf.Variable 'get_tr/w4:0' shape=(3, 3, 128, 128) dtype=float32_ref>", "<tf.Variable 'get_tr/b4:0' shape=(20, 73, 73, 128) dtype=float32_ref>", "<tf.Variable 'get_tr/w5:0' shape=(5, 5, 128, 128) dtype=float32_ref>", "<tf.Variable 'get_tr/fc1/kernel:0' shape=(609408, 512) dtype=float32_ref>", "<tf.Variable 'get_tr/fc1/bias:0' shape=(512,) dtype=float32_ref>", "<tf.Variable 'get_tr/fc2/kernel:0' shape=(512, 1024) dtype=float32_ref>", "<tf.Variable 'get_tr/fc2/bias:0' shape=(1024,) dtype=float32_ref>", "<tf.Variable 'get_tr/fc_o/kernel:0' shape=(1024, 4) dtype=float32_ref>", "<tf.Variable 'get_tr/fc_o/bias:0' shape=(4,) dtype=float32_ref>"] and loss Tensor("mean_squared_error/value:0", shape=(), dtype=float32).
下面是获取这个变换向量的代码。
def get_transform_vectors(self):
# Start of transformation prediction network
image = tf.concat((self.img_train_1, self.img_train_2), 3)
with tf.variable_scope('get_tr'):
w1 = tf.Variable(tf.truncated_normal(shape=[3, 3, 6, 64], stddev=0.1),
name='w1')
conv1 = tf.nn.conv2d(image,
filter=w1,
strides=(1,1,1,1),
padding="VALID",
name='conv1')
b1 = tf.Variable(tf.truncated_normal(shape=tf.shape(conv1), stddev=0.1),
name='b1')
conv1_ = tf.nn.relu(conv1+b1, name='conv1_')
w2 = tf.Variable(tf.truncated_normal(shape=[3, 3, 64, 64], stddev=0.1),
name='w2')
conv2 = tf.nn.conv2d(conv1_,
filter=w2,
strides=(1,1,1,1),
padding="VALID",
name='conv2')
b2 = tf.Variable(tf.truncated_normal(shape=tf.shape(conv2), stddev=0.1),
name='b2')
conv2_ = tf.nn.relu(conv2+b2, name='conv2_')
w3 = tf.Variable(tf.truncated_normal(shape=[3, 3, 64, 128], stddev=0.1),
name='w3')
conv3 = tf.nn.conv2d(conv2_,
filter=w3,
strides=(1,2,2,1),
padding="VALID",
name='conv3')
b3 = tf.Variable(tf.truncated_normal(shape=tf.shape(conv3), stddev=0.1),
name='b3')
conv3_ = tf.nn.relu(conv3+b3, name='conv3_')
w4 = tf.Variable(tf.truncated_normal(shape=[3, 3, 128, 128], stddev=0.1),
name='w4')
conv4 = tf.nn.conv2d(conv3_,
filter=w4,
strides=(1,2,2,1),
padding="VALID",
name='conv4')
b4 = tf.Variable(tf.truncated_normal(shape=tf.shape(conv4), stddev=0.1),
name='b4')
conv4_ = tf.nn.relu(conv4+b4, name='conv4_')
w5 = tf.Variable(tf.truncated_normal(shape=[5, 5, 128, 128], stddev=0.1),
name='w5')
conv5 = tf.nn.conv2d(conv4_,
filter=w5,
strides=(1,1,1,1),
padding="VALID",
name='conv5')
conv5_ = tf.contrib.layers.flatten(conv5)
fc1 = tf.layers.dense(conv5_, 512, activation=tf.nn.relu, name='fc1')
fc2 = tf.layers.dense(fc1, 1024, activation=tf.nn.relu, name='fc2')
fc_o = tf.layers.dense(fc2, 4, name='fc_o')
return fc_o
这是翻译图像和计算损失的代码。
self.img_o = tf.contrib.image.translate(self.img_train_1,
tf.cast(tf.reshape(self.tr_vector[:, 0:2], [self.batch_size,2]),
dtype=tf.float32)) +\
tf.contrib.image.translate(self.img_train_2,
tf.cast(tf.reshape(self.tr_vector[:, 2:4], [self.batch_size,2]),
dtype=tf.float32))
self.loss = tf.losses.mean_squared_error(self.img_o, self.img_label)
self.optim = tf.train.AdamOptimizer().minimize(self.loss)
由于您只对优化翻译感兴趣,我想出了这个函数来执行翻译并为向量设置梯度,使用 @tf.custom_gradient
:
import tensorflow as tf
@tf.custom_gradient
def my_img_translate(imgs, translates):
# Interpolation model has to be fixed due to limitations of tf.custom_gradient
interpolation = 'NEAREST'
imgs_translated = tf.contrib.image.translate(imgs, translates, interpolation=interpolation)
def grad(img_translated_grads):
translates_x = translates[:, 0]
translates_y = translates[:, 1]
translates_zero = tf.zeros_like(translates_x)
# X gradients
imgs_x_grad = (imgs[:, :, :-2] - imgs[:, :, 2:]) / 2
imgs_x_grad = tf.concat([(imgs[:, :, :1] - imgs[:, :, 1:2]),
imgs_x_grad,
(imgs[:, :, -2:-1] - imgs[:, :, -1:])], axis=2)
imgs_x_grad_translated = tf.contrib.image.translate(
imgs_x_grad, tf.stack([translates_x, translates_zero], axis=1),
interpolation=interpolation)
translates_x_grad = tf.reduce_sum(img_translated_grads * imgs_x_grad_translated, axis=(1, 2, 3))
# Y gradients
imgs_y_grad = (imgs[:, :-2] - imgs[:, 2:]) / 2
imgs_y_grad = tf.concat([(imgs[:, :1] - imgs[:, 1:2]),
imgs_y_grad,
(imgs[:, -2:-1] - imgs[:, -1:])], axis=1)
imgs_y_grad_translated = tf.contrib.image.translate(
imgs_y_grad, tf.stack([translates_zero, translates_y], axis=1),
interpolation=interpolation)
translates_y_grad = tf.reduce_sum(img_translated_grads * imgs_y_grad_translated, axis=(1, 2, 3))
# Complete gradient
translates_grad = tf.stack([translates_x_grad, translates_y_grad], axis=1)
return None, translates_grad
return imgs_translated, grad
请注意,在这种情况下,我不会为图像返回任何渐变,因为这些图像不会被优化(但如果您需要,原则上您可以使用内置渐变进行平移操作)。
我针对翻译图像的简单用例对此进行了测试,因此其中心具有最高值:
import tensorflow as tf
import numpy as np
@tf.custom_gradient
def my_img_translate(imgs, translates):
# Interpolation model has to be fixed due to limitations of tf.custom_gradient
interpolation = 'NEAREST'
imgs_translated = tf.contrib.image.translate(imgs, translates, interpolation=interpolation)
def grad(img_translated_grads):
translates_x = translates[:, 0]
translates_y = translates[:, 1]
translates_zero = tf.zeros_like(translates_x)
# X gradients
imgs_x_grad = (imgs[:, :, :-2] - imgs[:, :, 2:]) / 2
imgs_x_grad = tf.concat([(imgs[:, :, :1] - imgs[:, :, 1:2]),
imgs_x_grad,
(imgs[:, :, -2:-1] - imgs[:, :, -1:])], axis=2)
imgs_x_grad_translated = tf.contrib.image.translate(
imgs_x_grad, tf.stack([translates_x, translates_zero], axis=1),
interpolation=interpolation)
translates_x_grad = tf.reduce_sum(img_translated_grads * imgs_x_grad_translated, axis=(1, 2, 3))
# Y gradients
imgs_y_grad = (imgs[:, :-2] - imgs[:, 2:]) / 2
imgs_y_grad = tf.concat([(imgs[:, :1] - imgs[:, 1:2]),
imgs_y_grad,
(imgs[:, -2:-1] - imgs[:, -1:])], axis=1)
imgs_y_grad_translated = tf.contrib.image.translate(
imgs_y_grad, tf.stack([translates_zero, translates_y], axis=1),
interpolation=interpolation)
translates_y_grad = tf.reduce_sum(img_translated_grads * imgs_y_grad_translated, axis=(1, 2, 3))
# Complete gradient
translates_grad = tf.stack([translates_x_grad, translates_y_grad], axis=1)
return None, translates_grad
return imgs_translated, grad
# Test operations
imgs = tf.placeholder(tf.float32, [None, None, None, None])
translates = tf.Variable([0, 0], dtype=tf.float32)
translates_tiled = tf.tile(translates[tf.newaxis], (tf.shape(imgs)[0], 1))
imgs_translated = my_img_translate(imgs, translates_tiled)
imgs_midpoint = imgs_translated[:, tf.shape(imgs_translated)[1] // 2, tf.shape(imgs_translated)[2] // 2]
loss = -tf.reduce_sum(tf.square(imgs_midpoint))
train_op = tf.train.GradientDescentOptimizer(10).minimize(loss)
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
# Make test image
xs, ys = np.meshgrid(np.linspace(-2, 1, 100), np.linspace(-1, 2, 75))
test_img = (1 / (1 + np.square(xs))) * (1 / (1 + np.square(ys)))
test_img /= np.max(test_img)
test_img_batch = test_img[np.newaxis, :, :, np.newaxis]
# Train
sess.run(init_op)
for _ in range(100):
sess.run(train_op, feed_dict={imgs: test_img_batch})
# Show result
test_img_result = sess.run(imgs_translated, feed_dict={imgs: test_img_batch})[0, :, :, 0]
plt.figure()
plt.subplot(121)
plt.imshow(test_img)
plt.subplot(122)
plt.imshow(test_img_result)
结果:
我正在尝试在 TensorFlow 中实现一个学习预测单应性的网络(Wiki). My network will output a 4-dimensional vector, which will be used to translate 2 images horizontally and vertically. Then, I calculate the error with a ground truth using a central crop (tf.image.central_crop
)of these warped images. I tried implementing it, and for the translation part, I used tf.contrib.image.translate
。但是,梯度没有流向网络的变量。我该如何解决这个问题?这是我得到的错误:
ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables ["<tf.Variable 'get_tr/w1:0' shape=(3, 3, 6, 64) dtype=float32_ref>", "<tf.Variable 'get_tr/b1:0' shape=(20, 298, 298, 64) dtype=float32_ref>", "<tf.Variable 'get_tr/w2:0' shape=(3, 3, 64, 64) dtype=float32_ref>", "<tf.Variable 'get_tr/b2:0' shape=(20, 296, 296, 64) dtype=float32_ref>", "<tf.Variable 'get_tr/w3:0' shape=(3, 3, 64, 128) dtype=float32_ref>", "<tf.Variable 'get_tr/b3:0' shape=(20, 147, 147, 128) dtype=float32_ref>", "<tf.Variable 'get_tr/w4:0' shape=(3, 3, 128, 128) dtype=float32_ref>", "<tf.Variable 'get_tr/b4:0' shape=(20, 73, 73, 128) dtype=float32_ref>", "<tf.Variable 'get_tr/w5:0' shape=(5, 5, 128, 128) dtype=float32_ref>", "<tf.Variable 'get_tr/fc1/kernel:0' shape=(609408, 512) dtype=float32_ref>", "<tf.Variable 'get_tr/fc1/bias:0' shape=(512,) dtype=float32_ref>", "<tf.Variable 'get_tr/fc2/kernel:0' shape=(512, 1024) dtype=float32_ref>", "<tf.Variable 'get_tr/fc2/bias:0' shape=(1024,) dtype=float32_ref>", "<tf.Variable 'get_tr/fc_o/kernel:0' shape=(1024, 4) dtype=float32_ref>", "<tf.Variable 'get_tr/fc_o/bias:0' shape=(4,) dtype=float32_ref>"] and loss Tensor("mean_squared_error/value:0", shape=(), dtype=float32).
下面是获取这个变换向量的代码。
def get_transform_vectors(self):
# Start of transformation prediction network
image = tf.concat((self.img_train_1, self.img_train_2), 3)
with tf.variable_scope('get_tr'):
w1 = tf.Variable(tf.truncated_normal(shape=[3, 3, 6, 64], stddev=0.1),
name='w1')
conv1 = tf.nn.conv2d(image,
filter=w1,
strides=(1,1,1,1),
padding="VALID",
name='conv1')
b1 = tf.Variable(tf.truncated_normal(shape=tf.shape(conv1), stddev=0.1),
name='b1')
conv1_ = tf.nn.relu(conv1+b1, name='conv1_')
w2 = tf.Variable(tf.truncated_normal(shape=[3, 3, 64, 64], stddev=0.1),
name='w2')
conv2 = tf.nn.conv2d(conv1_,
filter=w2,
strides=(1,1,1,1),
padding="VALID",
name='conv2')
b2 = tf.Variable(tf.truncated_normal(shape=tf.shape(conv2), stddev=0.1),
name='b2')
conv2_ = tf.nn.relu(conv2+b2, name='conv2_')
w3 = tf.Variable(tf.truncated_normal(shape=[3, 3, 64, 128], stddev=0.1),
name='w3')
conv3 = tf.nn.conv2d(conv2_,
filter=w3,
strides=(1,2,2,1),
padding="VALID",
name='conv3')
b3 = tf.Variable(tf.truncated_normal(shape=tf.shape(conv3), stddev=0.1),
name='b3')
conv3_ = tf.nn.relu(conv3+b3, name='conv3_')
w4 = tf.Variable(tf.truncated_normal(shape=[3, 3, 128, 128], stddev=0.1),
name='w4')
conv4 = tf.nn.conv2d(conv3_,
filter=w4,
strides=(1,2,2,1),
padding="VALID",
name='conv4')
b4 = tf.Variable(tf.truncated_normal(shape=tf.shape(conv4), stddev=0.1),
name='b4')
conv4_ = tf.nn.relu(conv4+b4, name='conv4_')
w5 = tf.Variable(tf.truncated_normal(shape=[5, 5, 128, 128], stddev=0.1),
name='w5')
conv5 = tf.nn.conv2d(conv4_,
filter=w5,
strides=(1,1,1,1),
padding="VALID",
name='conv5')
conv5_ = tf.contrib.layers.flatten(conv5)
fc1 = tf.layers.dense(conv5_, 512, activation=tf.nn.relu, name='fc1')
fc2 = tf.layers.dense(fc1, 1024, activation=tf.nn.relu, name='fc2')
fc_o = tf.layers.dense(fc2, 4, name='fc_o')
return fc_o
这是翻译图像和计算损失的代码。
self.img_o = tf.contrib.image.translate(self.img_train_1,
tf.cast(tf.reshape(self.tr_vector[:, 0:2], [self.batch_size,2]),
dtype=tf.float32)) +\
tf.contrib.image.translate(self.img_train_2,
tf.cast(tf.reshape(self.tr_vector[:, 2:4], [self.batch_size,2]),
dtype=tf.float32))
self.loss = tf.losses.mean_squared_error(self.img_o, self.img_label)
self.optim = tf.train.AdamOptimizer().minimize(self.loss)
由于您只对优化翻译感兴趣,我想出了这个函数来执行翻译并为向量设置梯度,使用 @tf.custom_gradient
:
import tensorflow as tf
@tf.custom_gradient
def my_img_translate(imgs, translates):
# Interpolation model has to be fixed due to limitations of tf.custom_gradient
interpolation = 'NEAREST'
imgs_translated = tf.contrib.image.translate(imgs, translates, interpolation=interpolation)
def grad(img_translated_grads):
translates_x = translates[:, 0]
translates_y = translates[:, 1]
translates_zero = tf.zeros_like(translates_x)
# X gradients
imgs_x_grad = (imgs[:, :, :-2] - imgs[:, :, 2:]) / 2
imgs_x_grad = tf.concat([(imgs[:, :, :1] - imgs[:, :, 1:2]),
imgs_x_grad,
(imgs[:, :, -2:-1] - imgs[:, :, -1:])], axis=2)
imgs_x_grad_translated = tf.contrib.image.translate(
imgs_x_grad, tf.stack([translates_x, translates_zero], axis=1),
interpolation=interpolation)
translates_x_grad = tf.reduce_sum(img_translated_grads * imgs_x_grad_translated, axis=(1, 2, 3))
# Y gradients
imgs_y_grad = (imgs[:, :-2] - imgs[:, 2:]) / 2
imgs_y_grad = tf.concat([(imgs[:, :1] - imgs[:, 1:2]),
imgs_y_grad,
(imgs[:, -2:-1] - imgs[:, -1:])], axis=1)
imgs_y_grad_translated = tf.contrib.image.translate(
imgs_y_grad, tf.stack([translates_zero, translates_y], axis=1),
interpolation=interpolation)
translates_y_grad = tf.reduce_sum(img_translated_grads * imgs_y_grad_translated, axis=(1, 2, 3))
# Complete gradient
translates_grad = tf.stack([translates_x_grad, translates_y_grad], axis=1)
return None, translates_grad
return imgs_translated, grad
请注意,在这种情况下,我不会为图像返回任何渐变,因为这些图像不会被优化(但如果您需要,原则上您可以使用内置渐变进行平移操作)。
我针对翻译图像的简单用例对此进行了测试,因此其中心具有最高值:
import tensorflow as tf
import numpy as np
@tf.custom_gradient
def my_img_translate(imgs, translates):
# Interpolation model has to be fixed due to limitations of tf.custom_gradient
interpolation = 'NEAREST'
imgs_translated = tf.contrib.image.translate(imgs, translates, interpolation=interpolation)
def grad(img_translated_grads):
translates_x = translates[:, 0]
translates_y = translates[:, 1]
translates_zero = tf.zeros_like(translates_x)
# X gradients
imgs_x_grad = (imgs[:, :, :-2] - imgs[:, :, 2:]) / 2
imgs_x_grad = tf.concat([(imgs[:, :, :1] - imgs[:, :, 1:2]),
imgs_x_grad,
(imgs[:, :, -2:-1] - imgs[:, :, -1:])], axis=2)
imgs_x_grad_translated = tf.contrib.image.translate(
imgs_x_grad, tf.stack([translates_x, translates_zero], axis=1),
interpolation=interpolation)
translates_x_grad = tf.reduce_sum(img_translated_grads * imgs_x_grad_translated, axis=(1, 2, 3))
# Y gradients
imgs_y_grad = (imgs[:, :-2] - imgs[:, 2:]) / 2
imgs_y_grad = tf.concat([(imgs[:, :1] - imgs[:, 1:2]),
imgs_y_grad,
(imgs[:, -2:-1] - imgs[:, -1:])], axis=1)
imgs_y_grad_translated = tf.contrib.image.translate(
imgs_y_grad, tf.stack([translates_zero, translates_y], axis=1),
interpolation=interpolation)
translates_y_grad = tf.reduce_sum(img_translated_grads * imgs_y_grad_translated, axis=(1, 2, 3))
# Complete gradient
translates_grad = tf.stack([translates_x_grad, translates_y_grad], axis=1)
return None, translates_grad
return imgs_translated, grad
# Test operations
imgs = tf.placeholder(tf.float32, [None, None, None, None])
translates = tf.Variable([0, 0], dtype=tf.float32)
translates_tiled = tf.tile(translates[tf.newaxis], (tf.shape(imgs)[0], 1))
imgs_translated = my_img_translate(imgs, translates_tiled)
imgs_midpoint = imgs_translated[:, tf.shape(imgs_translated)[1] // 2, tf.shape(imgs_translated)[2] // 2]
loss = -tf.reduce_sum(tf.square(imgs_midpoint))
train_op = tf.train.GradientDescentOptimizer(10).minimize(loss)
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
# Make test image
xs, ys = np.meshgrid(np.linspace(-2, 1, 100), np.linspace(-1, 2, 75))
test_img = (1 / (1 + np.square(xs))) * (1 / (1 + np.square(ys)))
test_img /= np.max(test_img)
test_img_batch = test_img[np.newaxis, :, :, np.newaxis]
# Train
sess.run(init_op)
for _ in range(100):
sess.run(train_op, feed_dict={imgs: test_img_batch})
# Show result
test_img_result = sess.run(imgs_translated, feed_dict={imgs: test_img_batch})[0, :, :, 0]
plt.figure()
plt.subplot(121)
plt.imshow(test_img)
plt.subplot(122)
plt.imshow(test_img_result)
结果: