ValueError: Tape is still recording, This can happen if you try to re-enter an already-active tape

ValueError: Tape is still recording, This can happen if you try to re-enter an already-active tape

我写了一些关于深度后继表示 (DSQ) 强化学习的 tensorflow 代码:

class RL_Brain():
    def __init__(self, n_features, n_action, memory_size=10, batch_size=32, gamma=0.9, phi_size=15):
        self.n_features = n_features
        self.n_actions = n_action
        self.memory_size = memory_size
        self.replay_buffer = np.zeros((self.memory_size, n_features * 2 + 2), np.float)
        self.count = 0
        self.batch_size = batch_size
        self.gamma = gamma
        self.phi_size = phi_size
        self.epsilon = 0.9  # 默认有0.1的随机度
        self.model, self.mus_model = self.build_model()
        self.opt = Adam()

    def build_model(self):
        input_state = Input(shape=(self.n_features,), name='input')
        input_phi = Input(shape=(self.phi_size,), name='input_phi')

        layer1 = Dense(32, 'relu', name='encode/layer1')(input_state)
        layer2 = Dense(32, 'relu', name='encode/layer2')(layer1)
        layer3 = Dense(10, 'relu', name='encode/layer3')(layer2)
        phi = Dense(15, 'relu', name='phi')(layer3)
        decoder1 = Dense(10, 'relu', name='decode/layer1')(phi)
        decoder2 = Dense(32, 'relu', name='decode/layer2')(decoder1)
        decoder3 = Dense(32, 'relu', name='decode/layer3')(decoder2)
        s_hat = Dense(self.n_features, name='output_s_hat')(decoder3)

        stop_grad_phi = tf.stop_gradient(phi)
        R = Dense(1, name='R', use_bias=False)(stop_grad_phi)
        mus = []
        for i in range(self.n_actions):
            mu = Dense(10, 'relu', name='mu/m%s/layer1' % i)(input_phi)
            mu = Dense(10, 'relu', name='mu/m%s/layer2' % i)(mu)
            mu = Dense(15, 'relu', name='mu/m%s/layer3' % i)(mu)
            m = Model(inputs=input_phi, outputs=mu)
            mus.append(m)

        outputs = [phi, R, s_hat]
        model = Model(inputs=input_state, outputs=outputs)
        return model, mus
    def learn(self):
        # choices = np.random.choice(self.count if self.count < self.memory_size else self.memory_size, self.batch_size, replace=True)
        states = np.expand_dims(self.replay_buffer[(self.count-1) % self.memory_size, :self.n_features], 0)
        states_ = np.expand_dims(self.replay_buffer[(self.count-1) % self.memory_size, -self.n_features:], 0)
        r = np.expand_dims(self.replay_buffer[(self.count-1) % self.memory_size, self.n_features + 1], 0)
        a = self.replay_buffer[(self.count-1) % self.memory_size, self.n_features]
        o_phi_t, o_r, o_s_hat = self.model(states)  # 模型输出的phi, reward, s_hat
        print(o_r)
        # Training auto-encoder loss and reward loss.
        with tf.GradientTape() as tape:
            loss1 = tf.keras.losses.mean_squared_error(states, self.model(states)[2])
            loss2 = tf.keras.losses.mean_squared_error(r, self.model(states)[1])
            loss = loss1 + loss2
----->      self.opt.minimize(loss, self.model.trainable_variables, tape=tape)

        o_phi_t_, _, __ = self.model(states_)
        mus_ = tf.squeeze(tf.stack([self.mus_model[i](o_phi_t_) for i in range(self.n_actions)]))
        w = tf.Variable(self.model.get_layer('R').get_weights()[0])
        q = tf.matmul(mus_, w)
        max_q_action_index = tf.argmax(tf.squeeze(q)).numpy()
        # Training M loss
        # =========
        with tf.GradientTape() as tape:
            loss = tf.keras.losses.mean_squared_error(o_phi_t + self.gamma * mus_[max_q_action_index],self.mus_model[max_q_action_index](o_phi_t))
---->       self.opt.minimize(loss, self.mus_model[action_index].trainable_variables, tape=tape)

当我运行 learn 函数时,出现以下错误:

Traceback (most recent call last): File "/Users/wangheng/workspace/pycharmworkspace/MLAlgorithm/reinforcement_learning/SR/dsr_brain_keras.py", line 67, in learn self.opt.minimize(loss, self.model.trainable_variables, tape=tape) File "/Users/wangheng/app/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py", line 496, in minimize grads_and_vars = self._compute_gradients( File "/Users/wangheng/app/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py", line 543, in _compute_gradients with tape: File "/Users/wangheng/app/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/backprop.py", line 858, in enter self._push_tape() File "/Users/wangheng/app/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/backprop.py", line 869, in _push_tape raise ValueError("Tape is still recording, This can happen if you try to " ValueError: Tape is still recording, This can happen if you try to re-enter an already-active tape.

我猜可能是self.opt.minimize()发生了一些错误,但我不知道如何解决。

对优化器的调用必须超出梯度带的范围,即:

with tf.GradientTape() as tape:
  loss1 = tf.keras.losses.mean_squared_error(states, self.model(states)[2])
  loss2 = tf.keras.losses.mean_squared_error(r, self.model(states)[1])
  loss = loss1 + loss2
self.opt.minimize(loss, self.model.trainable_variables, tape=tape)