关于张量流中的几个变量计算粗麻布

Compute hessian with respect to several variables in tensorflow

在 tensorflow 中计算 Hessian 非常简单:

x = tf.Variable([1., 1., 1.], dtype=tf.float32, name="x")
f = (x[0] + x[1] ** 2 + x[0] * x[1] + x[2]) ** 2
hessian = tf.hessians(f, x)

这个正确returns

[[ 8., 20.,  4.],
   [20., 34.,  6.],
   [ 4.,  6.,  2.]]

在我的真实案例中,我需要将它分成两个变量,而不是使用一个变量 x 来保存三个值:x(保存前两个)和 y (拿着最后一张)

x = tf.Variable([1., 1.], dtype=tf.float32, name="x")
y = tf.Variable([1.], dtype=tf.float32, name="y")
f = (x[0] + x[1] ** 2 + x[0] * x[1] + y) ** 2

我尝试了一个天真的

hessian = tf.hessians(f, [x, y])

但我得到:[[ 8., 20.], [20., 34.]], [[2.]]

我也试过:

xy = tf.concat([x, y], axis=-1)

但是在定义粗麻布时

hessian = tf.hessians(f, xy)

我收到一个非常严重的错误:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    510                 as_ref=input_arg.is_ref,
--> 511                 preferred_dtype=default_dtype)
    512           except TypeError as err:

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accept_symbolic_tensors)
   1174     if ret is None:
-> 1175       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1176 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
    303   _ = as_ref
--> 304   return constant(v, dtype=dtype, name=name)
    305 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
    244   return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 245                         allow_broadcast=True)
    246 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
    282           value, dtype=dtype, shape=shape, verify_shape=verify_shape,
--> 283           allow_broadcast=allow_broadcast))
    284   dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape, allow_broadcast)
    453     if values is None:
--> 454       raise ValueError("None values not supported.")
    455     # if dtype is provided, forces numpy array to be the type

ValueError: None values not supported.

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    524               observed = ops.internal_convert_to_tensor(
--> 525                   values, as_ref=input_arg.is_ref).dtype.name
    526             except ValueError as err:

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accept_symbolic_tensors)
   1174     if ret is None:
-> 1175       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1176 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
    303   _ = as_ref
--> 304   return constant(v, dtype=dtype, name=name)
    305 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
    244   return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 245                         allow_broadcast=True)
    246 

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
    282           value, dtype=dtype, shape=shape, verify_shape=verify_shape,
--> 283           allow_broadcast=allow_broadcast))
    284   dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/tensor_util.py in make_tensor_proto(values, dtype, shape, verify_shape, allow_broadcast)
    453     if values is None:
--> 454       raise ValueError("None values not supported.")
    455     # if dtype is provided, forces numpy array to be the type

ValueError: None values not supported.

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-358-70bce7e5d400> in <module>
      3 f = (x[0] + x[1] ** 2 + x[0] * x[1] + y) ** 2
      4 xy = tf.concat([x, y], axis=-1)
----> 5 hessian = tf.hessians(f, xy)

~/venv3/lib/python3.7/site-packages/tensorflow/python/ops/gradients_impl.py in hessians(ys, xs, name, colocate_gradients_with_ops, gate_gradients, aggregation_method)
   1405   for gradient, x in zip(_gradients, xs):
   1406     # change shape to one-dimension without graph branching
-> 1407     gradient = array_ops.reshape(gradient, [-1])
   1408 
   1409     # Declare an iterator and tensor array loop variables for the gradients.

~/venv3/lib/python3.7/site-packages/tensorflow/python/ops/gen_array_ops.py in reshape(tensor, shape, name)
   7178   try:
   7179     _, _, _op = _op_def_lib._apply_op_helper(
-> 7180         "Reshape", tensor=tensor, shape=shape, name=name)
   7181   except (TypeError, ValueError):
   7182     result = _dispatch.dispatch(

~/venv3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    527               raise ValueError(
    528                   "Tried to convert '%s' to a tensor and failed. Error: %s" %
--> 529                   (input_name, err))
    530             prefix = ("Input '%s' of '%s' Op has type %s that does not match" %
    531                       (input_name, op_type_name, observed))

ValueError: Tried to convert 'tensor' to a tensor and failed. Error: None values not supported.


1

编辑:这是一个更充实的解决方案,本质上是相同的,但适用于任意数量的变量。此外,我还为雅可比矩阵添加了使用 Python 或 TensorFlow 循环的选项。请注意,代码假定所有变量都是一维张量。

from itertools import combinations, count
import tensorflow as tf

def jacobian(y, x, tf_loop=False):
    # If the shape of Y is fully defined you can choose between a
    # Python-level or TF-level loop to make the Jacobian matrix
    # If the shape of Y is not fully defined you must use TF loop
    # In both cases it is just a matter of stacking gradients for each Y
    if tf_loop or y.shape.num_elements() is None:
        i = tf.constant(0, dtype=tf.int32)
        y_size = tf.size(y)
        rows = tf.TensorArray(dtype=y.dtype, size=y_size, element_shape=x.shape)
        _, rows = tf.while_loop(
            lambda i, rows: i < y_size,
            lambda i, rows: [i + 1, rows.write(i, tf.gradients(y[i], x)[0])],
            [i, rows])
        return rows.stack()
    else:
        return tf.stack([tf.gradients(y[i], x)[0]
                         for i in range(y.shape.num_elements())], axis=0)

def hessian_multivar(ys, xs, tf_loop=False):
    # List of list of pieces of the Hessian matrix
    hessian_pieces = [[None] * len(xs) for _ in xs]
    # Hessians with respect to each x (diagonal pieces of the full Hessian)
    for i, h in enumerate(tf.hessians(ys, xs)):
        hessian_pieces[i][i] = h
    # First-order derivatives
    xs_grad = tf.gradients(ys, xs)
    # Pairwise second order derivatives as Jacobian matrices
    for (i1, (x1, g1)), (i2, (x2, g2)) in combinations(zip(count(), zip(xs, xs_grad)), 2):
        # Derivates in both orders
        hessian_pieces[i1][i2] = jacobian(g1, x2, tf_loop=tf_loop)
        hessian_pieces[i2][i1] = jacobian(g2, x1, tf_loop=tf_loop)
    # Concatenate everything together
    return tf.concat([tf.concat(hp, axis=1) for hp in hessian_pieces], axis=0)

# Test it with three variables
with tf.Graph().as_default():
    x = tf.Variable([1., 1.], dtype=tf.float32, name="x")
    y = tf.Variable([1.], dtype=tf.float32, name="y")
    z = tf.Variable([1., 1.], dtype=tf.float32, name="z")
    f = (x[0] + x[1] ** 2 + x[0] * x[1] + y + x * y * z) ** 2
    hessian = hessian_multivar(f, [x, y, z])
    init_op = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init_op)
        print(sess.run(hessian))

输出:

[[26. 54. 30. 16.  4.]
 [54. 90. 38.  6. 18.]
 [30. 38. 16. 14. 14.]
 [16.  6. 14.  2.  0.]
 [ 4. 18. 14.  0.  2.]]

我不确定当前 API 是否有 "good" 方法可以做到这一点。显然,您可以自己计算 Hessian 矩阵元素......它不是很优雅,也可能不是最快的解决方案,但在您的示例中可以这样做:

import tensorflow as tf

x = tf.Variable([1., 1.], dtype=tf.float32, name="x")
y = tf.Variable([1.], dtype=tf.float32, name="y")
f = (x[0] + x[1] ** 2 + x[0] * x[1] + y) ** 2
# X and Y pieces of Hessian
hx, hy = tf.hessians(f, [x, y])
# First-order X and Y derivatives
gx, gy = tf.gradients(f, [x, y])
# Remanining elements of Hessian can be computed as Jacobian matrices with
# X, Y and first-order derivatives. However TensorFlow does not implement this
# (https://github.com/tensorflow/tensorflow/issues/675)
# So you have to build it "by hand"
hxy = [tf.gradients(gx[i], y)[0] for i in range(x.shape.num_elements())]
hxy = tf.concat(hxy, axis=0)
# Here since Y has one element only it is easier
hyx, = tf.gradients(gy, x)
# Combine pieces of Hessian
h1 = tf.concat([hx, tf.expand_dims(hxy, 1)], axis=1)
h2 = tf.concat([tf.expand_dims(hyx, 0), hy], axis=1)
hessian = tf.concat([h1, h2], axis=0)
# Test it
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init_op)
    print(sess.run(hessian))

输出:

[[ 8. 20.  4.]
 [20. 34.  6.]
 [ 4.  6.  2.]]