tf.hessians 的 keras 模型的 hessian 矩阵
hessian matrix of a keras model with tf.hessians
我想计算 keras 模型的 Hessian 矩阵 w.r.t。它使用 tf.hessians
在图形模式下输入。
这是一个最小的例子
import tensorflow as tf
from tensorflow import keras
model = keras.Sequential([
keras.Input((10,)),
keras.layers.Dense(1)
])
model.summary()
@tf.function
def get_grads(inputs):
loss = tf.reduce_sum(model(inputs))
return tf.gradients(loss, inputs)
@tf.function
def get_hessian(inputs):
loss = tf.reduce_sum(model(inputs))
return tf.hessians(loss, inputs)
batch_size = 3
test_input = tf.random.uniform((batch_size, 10))
out = model(test_input) # works fine
grads = get_grads(test_input) # works fine
hessian = get_hessian(test_input) # raises ValueError: None values not supported.
前向传递和 get_grads
函数工作正常,get_hessian
函数引发 ValueError: None values not supported.
.
在这个例子中
@tf.function
def get_hessian_(inputs):
loss = tf.reduce_sum(inputs**2)
return tf.hessians(loss, inputs)
get_hessian_(tf.random.uniform((3,)))[0]
# <tf.Tensor: shape=(3, 3), dtype=float32, numpy=
# array([[2., 0., 0.],
# [0., 2., 0.],
# [0., 0., 2.]], dtype=float32)>
tf.hessians
产生了预期的结果,没有错误。
在您的代码示例中,
您正在尝试获取 f(x)
(模型输出)w.r.t 的粗麻布矩阵。 x
(输入)和f
是线性的(模型是线性的)。
f(x)
w.r.tHessian。 x
实际上应该是零张量,但 tf.hessians
无法正确处理,导致错误。添加具有非线性激活的附加层将消除错误。
代码示例:
使用tf.hessians
得到粗麻布:
model = tf.keras.Sequential([
Dense(10,activation='sigmoid'), #remove this line and you will get error
Dense(1)
])
@tf.function
def get_hessian(inputs):
loss = tf.reduce_sum(model(inputs))
return tf.hessians(loss, inputs)
batch_size = 3
tf.random.set_seed(123)
test_input = tf.random.uniform((3,10),minval=1.5,maxval=2.5)
hessian = get_hessian(test_input)
print(type(hessian))
print(len(hessian))
print(hessian[0].shape)
print(hessian[0][0,0,0,0])
print(hessian[0][0,0,0,1])
'''
<class 'list'>
1
(3, 10, 3, 10)
tf.Tensor(0.0028595054, shape=(), dtype=float32)
tf.Tensor(0.0009458237, shape=(), dtype=float32)
'''
使用tf.GradientTape()
得到hessian:
model = tf.keras.Sequential([
Dense(10,activation='sigmoid'), #remove this line and get_hessian return None
Dense(1)
])
@tf.function
def get_hessian(inputs):
with tf.GradientTape() as t2:
t2.watch(inputs)
with tf.GradientTape() as t1:
t1.watch(inputs)
loss = tf.reduce_sum(model(inputs))
g=t1.gradient(loss,inputs)
return t2.jacobian(g,inputs)
batch_size = 3
tf.random.set_seed(123)
test_input = tf.random.uniform((3,10),minval=1.5,maxval=2.5)
hessian = get_hessian(test_input)
print(type(hessian))
print(hessian.shape if hessian is not None else None)
print(hessian[0,0,0,0] if hessian is not None else None)
print(hessian[0,0,0,1] if hessian is not None else None)
'''
<class 'tensorflow.python.framework.ops.EagerTensor'>
(3, 10, 3, 10)
tf.Tensor(0.0028595058, shape=(), dtype=float32)
tf.Tensor(0.0009458238, shape=(), dtype=float32)
'''
如果你想得到一个零张量,你可以使用unconnected_gradients=tf.UnconnectedGradients.ZERO
model = tf.keras.Sequential([
Dense(1)
])
@tf.function
def get_hessian(inputs):
with tf.GradientTape() as t2:
t2.watch(inputs)
with tf.GradientTape() as t1:
t1.watch(inputs)
loss = tf.reduce_sum(model(inputs))
g=t1.gradient(loss,inputs,unconnected_gradients=tf.UnconnectedGradients.ZERO)
return t2.jacobian(g,inputs,unconnected_gradients=tf.UnconnectedGradients.ZERO)
batch_size = 3
tf.random.set_seed(123)
test_input = tf.random.uniform((3,10),minval=1.5,maxval=2.5)
hessian = get_hessian(test_input)
print(type(hessian))
print(hessian.shape)
print(tf.math.count_nonzero(hessian))
'''
<class 'tensorflow.python.framework.ops.EagerTensor'>
(3, 10, 3, 10)
tf.Tensor(0, shape=(), dtype=int64)
'''
我想计算 keras 模型的 Hessian 矩阵 w.r.t。它使用 tf.hessians
在图形模式下输入。
这是一个最小的例子
import tensorflow as tf
from tensorflow import keras
model = keras.Sequential([
keras.Input((10,)),
keras.layers.Dense(1)
])
model.summary()
@tf.function
def get_grads(inputs):
loss = tf.reduce_sum(model(inputs))
return tf.gradients(loss, inputs)
@tf.function
def get_hessian(inputs):
loss = tf.reduce_sum(model(inputs))
return tf.hessians(loss, inputs)
batch_size = 3
test_input = tf.random.uniform((batch_size, 10))
out = model(test_input) # works fine
grads = get_grads(test_input) # works fine
hessian = get_hessian(test_input) # raises ValueError: None values not supported.
前向传递和 get_grads
函数工作正常,get_hessian
函数引发 ValueError: None values not supported.
.
在这个例子中
@tf.function
def get_hessian_(inputs):
loss = tf.reduce_sum(inputs**2)
return tf.hessians(loss, inputs)
get_hessian_(tf.random.uniform((3,)))[0]
# <tf.Tensor: shape=(3, 3), dtype=float32, numpy=
# array([[2., 0., 0.],
# [0., 2., 0.],
# [0., 0., 2.]], dtype=float32)>
tf.hessians
产生了预期的结果,没有错误。
在您的代码示例中,
您正在尝试获取 f(x)
(模型输出)w.r.t 的粗麻布矩阵。 x
(输入)和f
是线性的(模型是线性的)。
f(x)
w.r.tHessian。 x
实际上应该是零张量,但 tf.hessians
无法正确处理,导致错误。添加具有非线性激活的附加层将消除错误。
代码示例:
使用tf.hessians
得到粗麻布:
model = tf.keras.Sequential([
Dense(10,activation='sigmoid'), #remove this line and you will get error
Dense(1)
])
@tf.function
def get_hessian(inputs):
loss = tf.reduce_sum(model(inputs))
return tf.hessians(loss, inputs)
batch_size = 3
tf.random.set_seed(123)
test_input = tf.random.uniform((3,10),minval=1.5,maxval=2.5)
hessian = get_hessian(test_input)
print(type(hessian))
print(len(hessian))
print(hessian[0].shape)
print(hessian[0][0,0,0,0])
print(hessian[0][0,0,0,1])
'''
<class 'list'>
1
(3, 10, 3, 10)
tf.Tensor(0.0028595054, shape=(), dtype=float32)
tf.Tensor(0.0009458237, shape=(), dtype=float32)
'''
使用tf.GradientTape()
得到hessian:
model = tf.keras.Sequential([
Dense(10,activation='sigmoid'), #remove this line and get_hessian return None
Dense(1)
])
@tf.function
def get_hessian(inputs):
with tf.GradientTape() as t2:
t2.watch(inputs)
with tf.GradientTape() as t1:
t1.watch(inputs)
loss = tf.reduce_sum(model(inputs))
g=t1.gradient(loss,inputs)
return t2.jacobian(g,inputs)
batch_size = 3
tf.random.set_seed(123)
test_input = tf.random.uniform((3,10),minval=1.5,maxval=2.5)
hessian = get_hessian(test_input)
print(type(hessian))
print(hessian.shape if hessian is not None else None)
print(hessian[0,0,0,0] if hessian is not None else None)
print(hessian[0,0,0,1] if hessian is not None else None)
'''
<class 'tensorflow.python.framework.ops.EagerTensor'>
(3, 10, 3, 10)
tf.Tensor(0.0028595058, shape=(), dtype=float32)
tf.Tensor(0.0009458238, shape=(), dtype=float32)
'''
如果你想得到一个零张量,你可以使用unconnected_gradients=tf.UnconnectedGradients.ZERO
model = tf.keras.Sequential([
Dense(1)
])
@tf.function
def get_hessian(inputs):
with tf.GradientTape() as t2:
t2.watch(inputs)
with tf.GradientTape() as t1:
t1.watch(inputs)
loss = tf.reduce_sum(model(inputs))
g=t1.gradient(loss,inputs,unconnected_gradients=tf.UnconnectedGradients.ZERO)
return t2.jacobian(g,inputs,unconnected_gradients=tf.UnconnectedGradients.ZERO)
batch_size = 3
tf.random.set_seed(123)
test_input = tf.random.uniform((3,10),minval=1.5,maxval=2.5)
hessian = get_hessian(test_input)
print(type(hessian))
print(hessian.shape)
print(tf.math.count_nonzero(hessian))
'''
<class 'tensorflow.python.framework.ops.EagerTensor'>
(3, 10, 3, 10)
tf.Tensor(0, shape=(), dtype=int64)
'''