磁带梯度给出错误的输出

Tape gradient gives wrong output

我正在尝试使用 tape.gradient() 计算梯度,但它给了我错误的答案。错误出现在 u_z=tape.gradient(u,z,unconnected_gradients=tf.UnconnectedGradients.ZERO) 行和下面代码后面的两行中。函数 u 在变量 z,f,t 中不是常数,但是计算 tape.gradient(u,z)tape.gradient(u,t) 的输出给了我一个 None 对象。如果我将 unconnected_gradients=tf.UnconnectedGradients.ZERO 作为参数传递,那么我会得到 0.0 作为导数,这是没有意义的。所以可能出错的一件事是网络断开连接,但我不明白为什么会发生这种情况以及如何解决它。我正在使用 tensorflow 2.6.0 and keras 2.6.0。我在下面提供代码和错误消息。

import tensorflow as tf
import numpy as np
from tensorflow import keras
import os
from tqdm import trange
import matplotlib.pyplot as plt
# Switch of unnecessary TF warning messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

class Model():
    def __init__(self):
        self.optimizer = keras.optimizers.Adam()
        self.initializer = tf.keras.initializers.HeNormal()
        self.batchSize = 500
        self.number_epochs=5000
        
    def NN(self,num_layers = 3, num_neurons = 30):
        model_ = keras.models.Sequential()
        model_.add(keras.layers.Dense(num_neurons,activation='tanh',input_dim=3,kernel_initializer = self.initializer))
        for layer in range(num_layers-1):
            model_.add(keras.layers.Dense(num_neurons,activation='tanh',kernel_initializer=self.initializer))
        model_.add(keras.layers.Dense(1,kernel_initializer=self.initializer))
        return model_
    
    def solve_pde(self,value_function,X,idx):
        z,f,t = X[:,0:1],X[:,1:2],X[:,2:3]
        with tf.GradientTape(persistent=True) as tape:
            u = value_function(tf.concat([z,f,t],axis=1))
        u_z = tape.gradient(u,z,unconnected_gradients=tf.UnconnectedGradients.ZERO)
        u_zz = tape.gradient(u_z,z,unconnected_gradients=tf.UnconnectedGradients.ZERO)
        u_t = tape.gradient(u,t)
        u_pde = u_t +   u_z  +  u_zz - tf.cast(0.5,dtype=tf.float32) * u
        return u_pde
    
    def loss_function(self,batchSize):
        z = tf.linspace(0.001,0.999, 200)
        f = tf.linspace(0.1,0.2, 20)
        z_tile = tf.tile(tf.expand_dims(z,axis=-1),multiples=[20,1])
        f_tile = tf.reshape(tf.repeat(f,200),[-1,1])
        dt = 0.9
        X=tf.concat((z_tile,f_tile,tf.reshape(tf.repeat(dt,z_tile.shape[0]),[-1,1])),axis=1)
        X_pde = tf.concat((z_tile,f_tile,tf.random.uniform(shape=(z_tile.shape[0],1),minval=0,maxval=dt)),axis=1)
        x_star = tf.concat((z_tile,f_tile,tf.reshape(tf.repeat(0.0,z_tile.shape[0]),[-1,1])),axis=1)
        idx = np.random.choice(X.shape[0],batchSize,replace=True)
        loss_e = self.solve_pde(self.value_function_e,X_pde,idx)
        self.value_updated = self.value_function_e(tf.concat[x_star[:,0:1],x_star[:,1:2],x_star[:,2:3]]).numpy().reshape(self.innerStep.Nz,self.innerStep.Nf).transpose()
        return loss_e
    
    @tf.function 
    def training_step(self):
        with tf.GradientTape(persistent=True) as tape:
            loss_e = self.loss_function(self.batchSize)
        grads_valueE = tape.gradient(loss_e,self.theta_valueFunction_e)
        self.optimizer.apply_gradients(zip(grads_valueE,self.theta_valueFunction_e))
        return loss_e
    
    def train_model(self):
        self.value_function_e = self.NN()

        self.theta_valueFunction_e = self.value_function_e.trainable_variables
        
        self.LVF= []

        for epoch in trange(self.number_epochs):
            print(epoch)
            loss_e = self.training_step()
            self.LVF_list.append(loss_e.numpy())
            
        
if __name__=="__main__":
    ext = Model()
    ext.train_model()    
    

错误消息以及完整的回溯是

Traceback (most recent call last):

  File "<ipython-input-26-f5a127c3c9ae>", line 1, in <module>
    runfile('C:/Users/user/Google Drive/S/Research Project4/trial.py', wdir='C:/Users/user/Google Drive/SFI/Research Project4')

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
    execfile(filename, namespace)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 85, in <module>
    ext.train_model()

  File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 79, in train_model
    loss_e = self.training_step()

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 862, in __call__
    return self._python_function(*args, **kwds)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3985, in bound_method_wrapper
    return wrapped_fn(weak_instance(), *args, **kwargs)

  File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 65, in training_step
    loss_e = self.loss_function(self.batchSize)

  File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 58, in loss_function
    loss_e = self.solve_pde(self.value_function_e,X_pde,idx)

  File "C:/Users/user/Google Drive/SFI/Research Project4/trial.py", line 34, in solve_pde
    u_pde = u_t +   u_z  +  u_zz - tf.cast(0.5,dtype=tf.float32) * u

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1399, in r_binary_op_wrapper
    y, x = maybe_promote_tensors(y, x)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1335, in maybe_promote_tensors
    ops.convert_to_tensor(tensor, dtype, name="x"))

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\profiler\trace.py", line 163, in wrapped
    return func(*args, **kwargs)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1566, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 346, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 272, in constant
    allow_broadcast=True)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 283, in _constant_impl
    return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 308, in _constant_eager_impl
    t = convert_to_eager_tensor(value, ctx, dtype)

  File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 106, in convert_to_eager_tensor
    return ops.EagerTensor(value, ctx.device_name, dtype)

ValueError: Attempt to convert a value (None) with an unsupported type (<class 'NoneType'>) to a Tensor.

非常感谢任何帮助。谢谢。

您的代码中有 2 个问题导致您无法获得所需的结果。

  1. 如果你想计算高阶导数,你必须创建嵌套的 GradientTape 对象
  2. GradientTape 在其上下文中自动跟踪变量,如果你想跟踪张量(在你的情况下,你想跟踪 zt)你必须调用 tape.watch(<my_tensor>)否则你不会有梯度。

固定码:

def solve_pde(self, value_function, X, idx):
    z, f, t = X[:, 0:1], X[:, 1:2], X[:, 2:3]
    with tf.GradientTape(persistent=True) as tape:
        tape.watch(z)
        with tf.GradientTape(persistent=True) as tape2:
            tape2.watch(z)
            tape2.watch(t)
            u = value_function(tf.concat([z, f, t], axis=1))
        u_z = tape2.gradient(u, z)
    u_zz = tape.gradient(u_z, z)
    u_t = tape2.gradient(u, t)
    u_pde = u_t + u_z + u_zz - tf.cast(0.5, dtype=tf.float32) * u
    return u_pde

更多关于渐变带的内容可以在官方文档中找到:https://www.tensorflow.org/api_docs/python/tf/GradientTape