磁带梯度给出错误的输出
Tape gradient gives wrong output
我正在尝试使用 tape.gradient()
计算梯度,但它给了我错误的答案。错误出现在 u_z=tape.gradient(u,z,unconnected_gradients=tf.UnconnectedGradients.ZERO)
行和下面代码后面的两行中。函数 u
在变量 z,f,t
中不是常数,但是计算 tape.gradient(u,z)
或 tape.gradient(u,t)
的输出给了我一个 None
对象。如果我将 unconnected_gradients=tf.UnconnectedGradients.ZERO
作为参数传递,那么我会得到 0.0
作为导数,这是没有意义的。所以可能出错的一件事是网络断开连接,但我不明白为什么会发生这种情况以及如何解决它。我正在使用 tensorflow 2.6.0 and keras 2.6.0
。我在下面提供代码和错误消息。
import tensorflow as tf
import numpy as np
from tensorflow import keras
import os
from tqdm import trange
import matplotlib.pyplot as plt
# Switch of unnecessary TF warning messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
class Model():
def __init__(self):
self.optimizer = keras.optimizers.Adam()
self.initializer = tf.keras.initializers.HeNormal()
self.batchSize = 500
self.number_epochs=5000
def NN(self,num_layers = 3, num_neurons = 30):
model_ = keras.models.Sequential()
model_.add(keras.layers.Dense(num_neurons,activation='tanh',input_dim=3,kernel_initializer = self.initializer))
for layer in range(num_layers-1):
model_.add(keras.layers.Dense(num_neurons,activation='tanh',kernel_initializer=self.initializer))
model_.add(keras.layers.Dense(1,kernel_initializer=self.initializer))
return model_
def solve_pde(self,value_function,X,idx):
z,f,t = X[:,0:1],X[:,1:2],X[:,2:3]
with tf.GradientTape(persistent=True) as tape:
u = value_function(tf.concat([z,f,t],axis=1))
u_z = tape.gradient(u,z,unconnected_gradients=tf.UnconnectedGradients.ZERO)
u_zz = tape.gradient(u_z,z,unconnected_gradients=tf.UnconnectedGradients.ZERO)
u_t = tape.gradient(u,t)
u_pde = u_t + u_z + u_zz - tf.cast(0.5,dtype=tf.float32) * u
return u_pde
def loss_function(self,batchSize):
z = tf.linspace(0.001,0.999, 200)
f = tf.linspace(0.1,0.2, 20)
z_tile = tf.tile(tf.expand_dims(z,axis=-1),multiples=[20,1])
f_tile = tf.reshape(tf.repeat(f,200),[-1,1])
dt = 0.9
X=tf.concat((z_tile,f_tile,tf.reshape(tf.repeat(dt,z_tile.shape[0]),[-1,1])),axis=1)
X_pde = tf.concat((z_tile,f_tile,tf.random.uniform(shape=(z_tile.shape[0],1),minval=0,maxval=dt)),axis=1)
x_star = tf.concat((z_tile,f_tile,tf.reshape(tf.repeat(0.0,z_tile.shape[0]),[-1,1])),axis=1)
idx = np.random.choice(X.shape[0],batchSize,replace=True)
loss_e = self.solve_pde(self.value_function_e,X_pde,idx)
self.value_updated = self.value_function_e(tf.concat[x_star[:,0:1],x_star[:,1:2],x_star[:,2:3]]).numpy().reshape(self.innerStep.Nz,self.innerStep.Nf).transpose()
return loss_e
@tf.function
def training_step(self):
with tf.GradientTape(persistent=True) as tape:
loss_e = self.loss_function(self.batchSize)
grads_valueE = tape.gradient(loss_e,self.theta_valueFunction_e)
self.optimizer.apply_gradients(zip(grads_valueE,self.theta_valueFunction_e))
return loss_e
def train_model(self):
self.value_function_e = self.NN()
self.theta_valueFunction_e = self.value_function_e.trainable_variables
self.LVF= []
for epoch in trange(self.number_epochs):
print(epoch)
loss_e = self.training_step()
self.LVF_list.append(loss_e.numpy())
if __name__=="__main__":
ext = Model()
ext.train_model()
错误消息以及完整的回溯是
Traceback (most recent call last):
File "<ipython-input-26-f5a127c3c9ae>", line 1, in <module>
runfile('C:/Users/user/Google Drive/S/Research Project4/trial.py', wdir='C:/Users/user/Google Drive/SFI/Research Project4')
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 85, in <module>
ext.train_model()
File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 79, in train_model
loss_e = self.training_step()
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 862, in __call__
return self._python_function(*args, **kwds)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3985, in bound_method_wrapper
return wrapped_fn(weak_instance(), *args, **kwargs)
File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 65, in training_step
loss_e = self.loss_function(self.batchSize)
File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 58, in loss_function
loss_e = self.solve_pde(self.value_function_e,X_pde,idx)
File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 34, in solve_pde
u_pde = u_t + u_z + u_zz - tf.cast(0.5,dtype=tf.float32) * u
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1399, in r_binary_op_wrapper
y, x = maybe_promote_tensors(y, x)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1335, in maybe_promote_tensors
ops.convert_to_tensor(tensor, dtype, name="x"))
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\profiler\trace.py", line 163, in wrapped
return func(*args, **kwargs)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1566, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 346, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 272, in constant
allow_broadcast=True)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 283, in _constant_impl
return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 308, in _constant_eager_impl
t = convert_to_eager_tensor(value, ctx, dtype)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 106, in convert_to_eager_tensor
return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: Attempt to convert a value (None) with an unsupported type (<class 'NoneType'>) to a Tensor.
非常感谢任何帮助。谢谢。
您的代码中有 2 个问题导致您无法获得所需的结果。
- 如果你想计算高阶导数,你必须创建嵌套的 GradientTape 对象
- GradientTape 在其上下文中自动跟踪变量,如果你想跟踪张量(在你的情况下,你想跟踪
z
和 t
)你必须调用 tape.watch(<my_tensor>)
否则你不会有梯度。
固定码:
def solve_pde(self, value_function, X, idx):
z, f, t = X[:, 0:1], X[:, 1:2], X[:, 2:3]
with tf.GradientTape(persistent=True) as tape:
tape.watch(z)
with tf.GradientTape(persistent=True) as tape2:
tape2.watch(z)
tape2.watch(t)
u = value_function(tf.concat([z, f, t], axis=1))
u_z = tape2.gradient(u, z)
u_zz = tape.gradient(u_z, z)
u_t = tape2.gradient(u, t)
u_pde = u_t + u_z + u_zz - tf.cast(0.5, dtype=tf.float32) * u
return u_pde
更多关于渐变带的内容可以在官方文档中找到:https://www.tensorflow.org/api_docs/python/tf/GradientTape
我正在尝试使用 tape.gradient()
计算梯度,但它给了我错误的答案。错误出现在 u_z=tape.gradient(u,z,unconnected_gradients=tf.UnconnectedGradients.ZERO)
行和下面代码后面的两行中。函数 u
在变量 z,f,t
中不是常数,但是计算 tape.gradient(u,z)
或 tape.gradient(u,t)
的输出给了我一个 None
对象。如果我将 unconnected_gradients=tf.UnconnectedGradients.ZERO
作为参数传递,那么我会得到 0.0
作为导数,这是没有意义的。所以可能出错的一件事是网络断开连接,但我不明白为什么会发生这种情况以及如何解决它。我正在使用 tensorflow 2.6.0 and keras 2.6.0
。我在下面提供代码和错误消息。
import tensorflow as tf
import numpy as np
from tensorflow import keras
import os
from tqdm import trange
import matplotlib.pyplot as plt
# Switch of unnecessary TF warning messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
class Model():
def __init__(self):
self.optimizer = keras.optimizers.Adam()
self.initializer = tf.keras.initializers.HeNormal()
self.batchSize = 500
self.number_epochs=5000
def NN(self,num_layers = 3, num_neurons = 30):
model_ = keras.models.Sequential()
model_.add(keras.layers.Dense(num_neurons,activation='tanh',input_dim=3,kernel_initializer = self.initializer))
for layer in range(num_layers-1):
model_.add(keras.layers.Dense(num_neurons,activation='tanh',kernel_initializer=self.initializer))
model_.add(keras.layers.Dense(1,kernel_initializer=self.initializer))
return model_
def solve_pde(self,value_function,X,idx):
z,f,t = X[:,0:1],X[:,1:2],X[:,2:3]
with tf.GradientTape(persistent=True) as tape:
u = value_function(tf.concat([z,f,t],axis=1))
u_z = tape.gradient(u,z,unconnected_gradients=tf.UnconnectedGradients.ZERO)
u_zz = tape.gradient(u_z,z,unconnected_gradients=tf.UnconnectedGradients.ZERO)
u_t = tape.gradient(u,t)
u_pde = u_t + u_z + u_zz - tf.cast(0.5,dtype=tf.float32) * u
return u_pde
def loss_function(self,batchSize):
z = tf.linspace(0.001,0.999, 200)
f = tf.linspace(0.1,0.2, 20)
z_tile = tf.tile(tf.expand_dims(z,axis=-1),multiples=[20,1])
f_tile = tf.reshape(tf.repeat(f,200),[-1,1])
dt = 0.9
X=tf.concat((z_tile,f_tile,tf.reshape(tf.repeat(dt,z_tile.shape[0]),[-1,1])),axis=1)
X_pde = tf.concat((z_tile,f_tile,tf.random.uniform(shape=(z_tile.shape[0],1),minval=0,maxval=dt)),axis=1)
x_star = tf.concat((z_tile,f_tile,tf.reshape(tf.repeat(0.0,z_tile.shape[0]),[-1,1])),axis=1)
idx = np.random.choice(X.shape[0],batchSize,replace=True)
loss_e = self.solve_pde(self.value_function_e,X_pde,idx)
self.value_updated = self.value_function_e(tf.concat[x_star[:,0:1],x_star[:,1:2],x_star[:,2:3]]).numpy().reshape(self.innerStep.Nz,self.innerStep.Nf).transpose()
return loss_e
@tf.function
def training_step(self):
with tf.GradientTape(persistent=True) as tape:
loss_e = self.loss_function(self.batchSize)
grads_valueE = tape.gradient(loss_e,self.theta_valueFunction_e)
self.optimizer.apply_gradients(zip(grads_valueE,self.theta_valueFunction_e))
return loss_e
def train_model(self):
self.value_function_e = self.NN()
self.theta_valueFunction_e = self.value_function_e.trainable_variables
self.LVF= []
for epoch in trange(self.number_epochs):
print(epoch)
loss_e = self.training_step()
self.LVF_list.append(loss_e.numpy())
if __name__=="__main__":
ext = Model()
ext.train_model()
错误消息以及完整的回溯是
Traceback (most recent call last):
File "<ipython-input-26-f5a127c3c9ae>", line 1, in <module>
runfile('C:/Users/user/Google Drive/S/Research Project4/trial.py', wdir='C:/Users/user/Google Drive/SFI/Research Project4')
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 85, in <module>
ext.train_model()
File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 79, in train_model
loss_e = self.training_step()
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 862, in __call__
return self._python_function(*args, **kwds)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3985, in bound_method_wrapper
return wrapped_fn(weak_instance(), *args, **kwargs)
File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 65, in training_step
loss_e = self.loss_function(self.batchSize)
File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 58, in loss_function
loss_e = self.solve_pde(self.value_function_e,X_pde,idx)
File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 34, in solve_pde
u_pde = u_t + u_z + u_zz - tf.cast(0.5,dtype=tf.float32) * u
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1399, in r_binary_op_wrapper
y, x = maybe_promote_tensors(y, x)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1335, in maybe_promote_tensors
ops.convert_to_tensor(tensor, dtype, name="x"))
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\profiler\trace.py", line 163, in wrapped
return func(*args, **kwargs)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1566, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 346, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 272, in constant
allow_broadcast=True)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 283, in _constant_impl
return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 308, in _constant_eager_impl
t = convert_to_eager_tensor(value, ctx, dtype)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 106, in convert_to_eager_tensor
return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: Attempt to convert a value (None) with an unsupported type (<class 'NoneType'>) to a Tensor.
非常感谢任何帮助。谢谢。
您的代码中有 2 个问题导致您无法获得所需的结果。
- 如果你想计算高阶导数,你必须创建嵌套的 GradientTape 对象
- GradientTape 在其上下文中自动跟踪变量,如果你想跟踪张量(在你的情况下,你想跟踪
z
和t
)你必须调用tape.watch(<my_tensor>)
否则你不会有梯度。
固定码:
def solve_pde(self, value_function, X, idx):
z, f, t = X[:, 0:1], X[:, 1:2], X[:, 2:3]
with tf.GradientTape(persistent=True) as tape:
tape.watch(z)
with tf.GradientTape(persistent=True) as tape2:
tape2.watch(z)
tape2.watch(t)
u = value_function(tf.concat([z, f, t], axis=1))
u_z = tape2.gradient(u, z)
u_zz = tape.gradient(u_z, z)
u_t = tape2.gradient(u, t)
u_pde = u_t + u_z + u_zz - tf.cast(0.5, dtype=tf.float32) * u
return u_pde
更多关于渐变带的内容可以在官方文档中找到:https://www.tensorflow.org/api_docs/python/tf/GradientTape