ValueError: Tape is still recording, This can happen if you try to re-enter an already-active tape
ValueError: Tape is still recording, This can happen if you try to re-enter an already-active tape
我写了一些关于深度后继表示 (DSQ) 强化学习的 tensorflow 代码:
class RL_Brain():
def __init__(self, n_features, n_action, memory_size=10, batch_size=32, gamma=0.9, phi_size=15):
self.n_features = n_features
self.n_actions = n_action
self.memory_size = memory_size
self.replay_buffer = np.zeros((self.memory_size, n_features * 2 + 2), np.float)
self.count = 0
self.batch_size = batch_size
self.gamma = gamma
self.phi_size = phi_size
self.epsilon = 0.9 # 默认有0.1的随机度
self.model, self.mus_model = self.build_model()
self.opt = Adam()
def build_model(self):
input_state = Input(shape=(self.n_features,), name='input')
input_phi = Input(shape=(self.phi_size,), name='input_phi')
layer1 = Dense(32, 'relu', name='encode/layer1')(input_state)
layer2 = Dense(32, 'relu', name='encode/layer2')(layer1)
layer3 = Dense(10, 'relu', name='encode/layer3')(layer2)
phi = Dense(15, 'relu', name='phi')(layer3)
decoder1 = Dense(10, 'relu', name='decode/layer1')(phi)
decoder2 = Dense(32, 'relu', name='decode/layer2')(decoder1)
decoder3 = Dense(32, 'relu', name='decode/layer3')(decoder2)
s_hat = Dense(self.n_features, name='output_s_hat')(decoder3)
stop_grad_phi = tf.stop_gradient(phi)
R = Dense(1, name='R', use_bias=False)(stop_grad_phi)
mus = []
for i in range(self.n_actions):
mu = Dense(10, 'relu', name='mu/m%s/layer1' % i)(input_phi)
mu = Dense(10, 'relu', name='mu/m%s/layer2' % i)(mu)
mu = Dense(15, 'relu', name='mu/m%s/layer3' % i)(mu)
m = Model(inputs=input_phi, outputs=mu)
mus.append(m)
outputs = [phi, R, s_hat]
model = Model(inputs=input_state, outputs=outputs)
return model, mus
def learn(self):
# choices = np.random.choice(self.count if self.count < self.memory_size else self.memory_size, self.batch_size, replace=True)
states = np.expand_dims(self.replay_buffer[(self.count-1) % self.memory_size, :self.n_features], 0)
states_ = np.expand_dims(self.replay_buffer[(self.count-1) % self.memory_size, -self.n_features:], 0)
r = np.expand_dims(self.replay_buffer[(self.count-1) % self.memory_size, self.n_features + 1], 0)
a = self.replay_buffer[(self.count-1) % self.memory_size, self.n_features]
o_phi_t, o_r, o_s_hat = self.model(states) # 模型输出的phi, reward, s_hat
print(o_r)
# Training auto-encoder loss and reward loss.
with tf.GradientTape() as tape:
loss1 = tf.keras.losses.mean_squared_error(states, self.model(states)[2])
loss2 = tf.keras.losses.mean_squared_error(r, self.model(states)[1])
loss = loss1 + loss2
-----> self.opt.minimize(loss, self.model.trainable_variables, tape=tape)
o_phi_t_, _, __ = self.model(states_)
mus_ = tf.squeeze(tf.stack([self.mus_model[i](o_phi_t_) for i in range(self.n_actions)]))
w = tf.Variable(self.model.get_layer('R').get_weights()[0])
q = tf.matmul(mus_, w)
max_q_action_index = tf.argmax(tf.squeeze(q)).numpy()
# Training M loss
# =========
with tf.GradientTape() as tape:
loss = tf.keras.losses.mean_squared_error(o_phi_t + self.gamma * mus_[max_q_action_index],self.mus_model[max_q_action_index](o_phi_t))
----> self.opt.minimize(loss, self.mus_model[action_index].trainable_variables, tape=tape)
当我运行 learn
函数时,出现以下错误:
Traceback (most recent call last):
File "/Users/wangheng/workspace/pycharmworkspace/MLAlgorithm/reinforcement_learning/SR/dsr_brain_keras.py", line 67, in learn
self.opt.minimize(loss, self.model.trainable_variables, tape=tape)
File "/Users/wangheng/app/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py", line 496, in minimize
grads_and_vars = self._compute_gradients(
File "/Users/wangheng/app/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py", line 543, in _compute_gradients
with tape:
File "/Users/wangheng/app/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/backprop.py", line 858, in enter
self._push_tape()
File "/Users/wangheng/app/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/backprop.py", line 869, in _push_tape
raise ValueError("Tape is still recording, This can happen if you try to "
ValueError: Tape is still recording, This can happen if you try to re-enter an already-active tape.
我猜可能是self.opt.minimize()
发生了一些错误,但我不知道如何解决。
对优化器的调用必须超出梯度带的范围,即:
with tf.GradientTape() as tape:
loss1 = tf.keras.losses.mean_squared_error(states, self.model(states)[2])
loss2 = tf.keras.losses.mean_squared_error(r, self.model(states)[1])
loss = loss1 + loss2
self.opt.minimize(loss, self.model.trainable_variables, tape=tape)
我写了一些关于深度后继表示 (DSQ) 强化学习的 tensorflow 代码:
class RL_Brain():
def __init__(self, n_features, n_action, memory_size=10, batch_size=32, gamma=0.9, phi_size=15):
self.n_features = n_features
self.n_actions = n_action
self.memory_size = memory_size
self.replay_buffer = np.zeros((self.memory_size, n_features * 2 + 2), np.float)
self.count = 0
self.batch_size = batch_size
self.gamma = gamma
self.phi_size = phi_size
self.epsilon = 0.9 # 默认有0.1的随机度
self.model, self.mus_model = self.build_model()
self.opt = Adam()
def build_model(self):
input_state = Input(shape=(self.n_features,), name='input')
input_phi = Input(shape=(self.phi_size,), name='input_phi')
layer1 = Dense(32, 'relu', name='encode/layer1')(input_state)
layer2 = Dense(32, 'relu', name='encode/layer2')(layer1)
layer3 = Dense(10, 'relu', name='encode/layer3')(layer2)
phi = Dense(15, 'relu', name='phi')(layer3)
decoder1 = Dense(10, 'relu', name='decode/layer1')(phi)
decoder2 = Dense(32, 'relu', name='decode/layer2')(decoder1)
decoder3 = Dense(32, 'relu', name='decode/layer3')(decoder2)
s_hat = Dense(self.n_features, name='output_s_hat')(decoder3)
stop_grad_phi = tf.stop_gradient(phi)
R = Dense(1, name='R', use_bias=False)(stop_grad_phi)
mus = []
for i in range(self.n_actions):
mu = Dense(10, 'relu', name='mu/m%s/layer1' % i)(input_phi)
mu = Dense(10, 'relu', name='mu/m%s/layer2' % i)(mu)
mu = Dense(15, 'relu', name='mu/m%s/layer3' % i)(mu)
m = Model(inputs=input_phi, outputs=mu)
mus.append(m)
outputs = [phi, R, s_hat]
model = Model(inputs=input_state, outputs=outputs)
return model, mus
def learn(self):
# choices = np.random.choice(self.count if self.count < self.memory_size else self.memory_size, self.batch_size, replace=True)
states = np.expand_dims(self.replay_buffer[(self.count-1) % self.memory_size, :self.n_features], 0)
states_ = np.expand_dims(self.replay_buffer[(self.count-1) % self.memory_size, -self.n_features:], 0)
r = np.expand_dims(self.replay_buffer[(self.count-1) % self.memory_size, self.n_features + 1], 0)
a = self.replay_buffer[(self.count-1) % self.memory_size, self.n_features]
o_phi_t, o_r, o_s_hat = self.model(states) # 模型输出的phi, reward, s_hat
print(o_r)
# Training auto-encoder loss and reward loss.
with tf.GradientTape() as tape:
loss1 = tf.keras.losses.mean_squared_error(states, self.model(states)[2])
loss2 = tf.keras.losses.mean_squared_error(r, self.model(states)[1])
loss = loss1 + loss2
-----> self.opt.minimize(loss, self.model.trainable_variables, tape=tape)
o_phi_t_, _, __ = self.model(states_)
mus_ = tf.squeeze(tf.stack([self.mus_model[i](o_phi_t_) for i in range(self.n_actions)]))
w = tf.Variable(self.model.get_layer('R').get_weights()[0])
q = tf.matmul(mus_, w)
max_q_action_index = tf.argmax(tf.squeeze(q)).numpy()
# Training M loss
# =========
with tf.GradientTape() as tape:
loss = tf.keras.losses.mean_squared_error(o_phi_t + self.gamma * mus_[max_q_action_index],self.mus_model[max_q_action_index](o_phi_t))
----> self.opt.minimize(loss, self.mus_model[action_index].trainable_variables, tape=tape)
当我运行 learn
函数时,出现以下错误:
Traceback (most recent call last): File "/Users/wangheng/workspace/pycharmworkspace/MLAlgorithm/reinforcement_learning/SR/dsr_brain_keras.py", line 67, in learn self.opt.minimize(loss, self.model.trainable_variables, tape=tape) File "/Users/wangheng/app/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py", line 496, in minimize grads_and_vars = self._compute_gradients( File "/Users/wangheng/app/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py", line 543, in _compute_gradients with tape: File "/Users/wangheng/app/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/backprop.py", line 858, in enter self._push_tape() File "/Users/wangheng/app/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/backprop.py", line 869, in _push_tape raise ValueError("Tape is still recording, This can happen if you try to " ValueError: Tape is still recording, This can happen if you try to re-enter an already-active tape.
我猜可能是self.opt.minimize()
发生了一些错误,但我不知道如何解决。
对优化器的调用必须超出梯度带的范围,即:
with tf.GradientTape() as tape:
loss1 = tf.keras.losses.mean_squared_error(states, self.model(states)[2])
loss2 = tf.keras.losses.mean_squared_error(r, self.model(states)[1])
loss = loss1 + loss2
self.opt.minimize(loss, self.model.trainable_variables, tape=tape)