运行时错误("grad can be implicitly created only for scalar outputs")
RuntimeError("grad can be implicitly created only for scalar outputs")
我有以下用于训练 A3C 的训练函数代码。
我遇到了以下错误。
RuntimeError("grad can be implicitly created only for scalar outputs")
第 (policy_loss + 0.5 * value_loss).backward()
行
这是我的代码,有人可以帮忙检查一下这段代码有什么问题吗?
def train(rank, params, shared_model, optimizer,ticker):
torch.manual_seed(params.seed + rank) # shifting the seed with rank to asynchronize each training agent
print(ticker)
try:
ohlcv = pd.read_csv(ticker + '.csv')
data = ohlcv.copy()
data['rsi'] = ab.RSI(data['Close'],14)
data['adx'] = ab.ADX(data,20)
data = ab.BollBnd(data,20)
data['BBr'] = data['Close']/data['BB_dn']
data['macd_signal'] = ab.MACD(data)
data['macd_r'] = data['macd_signal']/data['Close']
data['ema20'] = ab.EMA(np.asarray(data['Close']), 20)
data['ema20_r'] = data['ema20']/data['Close']
data['Close'] = data['Close']/data['Close'].max()
data = data.iloc[:,[4,7,8,13,15,17]]
data = data.dropna()
data = torch.DoubleTensor(np.asarray(data))
env = ENV(state_dim, action_dim, data)
model = ActorCritic(env.observation_space, env.action_space)
state = env.reset()
done = True
episode_length = 0
while True:
episode_length += 1
model.load_state_dict(shared_model.state_dict())
if done:
cx = Variable(torch.zeros(1, state_dim)) # the cell states of the LSTM are reinitialized to zero
hx = Variable(torch.zeros(1, state_dim)) # the hidden states of the LSTM are reinitialized to zero
else:
cx = Variable(cx.data)
hx = Variable(hx.data)
values = []
log_probs = []
rewards = []
entropies = []
for step in range(params.num_steps):
value, action_values, (hx, cx) = model((Variable(state.unsqueeze(0)), (hx, cx)))
prob = F.softmax(action_values,-1)
log_prob = F.log_softmax(action_values,-1)
entropy = -(log_prob * prob).sum(1)
entropies.append(entropy)
action = prob.multinomial(num_samples = action_dim).data
log_prob = log_prob.gather(1, Variable(action))
values.append(value)
log_probs.append(log_prob)
state, reward, done = env.step(action)
done = (done or episode_length >= params.max_episode_length)
reward = max(min(reward, 1), -1) # clamping the reward between -1 and +1
if done:
episode_length = 0
state = env.reset()
rewards.append(reward)
if done:
break
R = torch.zeros(1, 1)
if not done: # if we are not done:
value, _, _ = model((Variable(state.unsqueeze(0)), (hx, cx)))
R = value.data
values.append(Variable(R))
policy_loss = torch.zeros(1, 1)
value_loss = torch.zeros(1, 1)
R = Variable(R)
gae = torch.zeros(1, 1)
for i in reversed(range(len(rewards))):
R = params.gamma * R + rewards[i]
advantage = R - values[i]
print("advantage:",advantage)
value_loss = value_loss + 0.5 * advantage.pow(2) # computing the value loss
TD = rewards[i] + params.gamma * values[i + 1].data - values[i].data # computing the temporal difference
gae = gae * params.gamma * params.tau + TD # gae = sum_i (gamma*tau)^i * TD(i) with gae_i = gae_(i+1)*gamma*tau + (r_i + gamma*V(state_i+1) - V(state_i))
print("gae:",gae)
policy_loss = policy_loss - log_probs[i] * Variable(gae) - 0.01 * entropies[i] # computing the policy loss
print("policy_loss:",policy_loss)
optimizer.zero_grad() # initializing the optimizer
los = policy_loss + 0.5 * value_loss
print("los",los.shape)
(policy_loss + 0.5 * value_loss).backward()
torch.nn.utils.clip_grad_norm(model.parameters(), 40) # clamping the values
ensure_shared_grads(model, shared_model)
optimizer.step() # running the optimization step
except Exception as e:
print(e)
traceback.print_exc()
var = traceback.format_exc()
下面是输出:-
advantage: tensor([[-1.0750]], grad_fn=<ThSubBackward>)
gae: tensor([[-1.0750]])
policy_loss: tensor([[-25.8590, -26.1414, -25.9023, -25.2628]], grad_fn=<ThSubBackward>)
los torch.Size([1, 4])
RuntimeError:只能为标量输出隐式创建 grad
PS E:\ML\Breakout_a3c\Code_With_Comments>
您得到的 pytorch 错误意味着“您只能在标量上向后调用,即 0 维张量”。在这里,根据您的打印,policy_loss
不是标量,它是一个 1x4 矩阵。因此,policy_loss + 0.5 * value_loss
也是如此。因此,您对 backward
的调用会产生错误。
您可能忘记了将损失减少到标量(使用 norm
或 MSELoss
... 等函数)。参见示例 here
它不起作用的原因是梯度传播在内部工作的方式(它基本上是一个雅可比乘法引擎)。您可以在 non-scalar 张量上向后调用,但是您必须自己提供梯度,例如:
# loss is 1x4
loss = policy_loss + 0.5 * value_loss
# explicit gradient backprop with non-scalar tensor
loss.backward(torch.ones(1,4))
如果没有很好地理解 Pytorch 的 Autograd 的工作原理及其含义,您真的不应该这样做。
PS:下次,请提供一个最小的工作示例:)
我有以下用于训练 A3C 的训练函数代码。 我遇到了以下错误。
RuntimeError("grad can be implicitly created only for scalar outputs")
第 (policy_loss + 0.5 * value_loss).backward()
这是我的代码,有人可以帮忙检查一下这段代码有什么问题吗?
def train(rank, params, shared_model, optimizer,ticker):
torch.manual_seed(params.seed + rank) # shifting the seed with rank to asynchronize each training agent
print(ticker)
try:
ohlcv = pd.read_csv(ticker + '.csv')
data = ohlcv.copy()
data['rsi'] = ab.RSI(data['Close'],14)
data['adx'] = ab.ADX(data,20)
data = ab.BollBnd(data,20)
data['BBr'] = data['Close']/data['BB_dn']
data['macd_signal'] = ab.MACD(data)
data['macd_r'] = data['macd_signal']/data['Close']
data['ema20'] = ab.EMA(np.asarray(data['Close']), 20)
data['ema20_r'] = data['ema20']/data['Close']
data['Close'] = data['Close']/data['Close'].max()
data = data.iloc[:,[4,7,8,13,15,17]]
data = data.dropna()
data = torch.DoubleTensor(np.asarray(data))
env = ENV(state_dim, action_dim, data)
model = ActorCritic(env.observation_space, env.action_space)
state = env.reset()
done = True
episode_length = 0
while True:
episode_length += 1
model.load_state_dict(shared_model.state_dict())
if done:
cx = Variable(torch.zeros(1, state_dim)) # the cell states of the LSTM are reinitialized to zero
hx = Variable(torch.zeros(1, state_dim)) # the hidden states of the LSTM are reinitialized to zero
else:
cx = Variable(cx.data)
hx = Variable(hx.data)
values = []
log_probs = []
rewards = []
entropies = []
for step in range(params.num_steps):
value, action_values, (hx, cx) = model((Variable(state.unsqueeze(0)), (hx, cx)))
prob = F.softmax(action_values,-1)
log_prob = F.log_softmax(action_values,-1)
entropy = -(log_prob * prob).sum(1)
entropies.append(entropy)
action = prob.multinomial(num_samples = action_dim).data
log_prob = log_prob.gather(1, Variable(action))
values.append(value)
log_probs.append(log_prob)
state, reward, done = env.step(action)
done = (done or episode_length >= params.max_episode_length)
reward = max(min(reward, 1), -1) # clamping the reward between -1 and +1
if done:
episode_length = 0
state = env.reset()
rewards.append(reward)
if done:
break
R = torch.zeros(1, 1)
if not done: # if we are not done:
value, _, _ = model((Variable(state.unsqueeze(0)), (hx, cx)))
R = value.data
values.append(Variable(R))
policy_loss = torch.zeros(1, 1)
value_loss = torch.zeros(1, 1)
R = Variable(R)
gae = torch.zeros(1, 1)
for i in reversed(range(len(rewards))):
R = params.gamma * R + rewards[i]
advantage = R - values[i]
print("advantage:",advantage)
value_loss = value_loss + 0.5 * advantage.pow(2) # computing the value loss
TD = rewards[i] + params.gamma * values[i + 1].data - values[i].data # computing the temporal difference
gae = gae * params.gamma * params.tau + TD # gae = sum_i (gamma*tau)^i * TD(i) with gae_i = gae_(i+1)*gamma*tau + (r_i + gamma*V(state_i+1) - V(state_i))
print("gae:",gae)
policy_loss = policy_loss - log_probs[i] * Variable(gae) - 0.01 * entropies[i] # computing the policy loss
print("policy_loss:",policy_loss)
optimizer.zero_grad() # initializing the optimizer
los = policy_loss + 0.5 * value_loss
print("los",los.shape)
(policy_loss + 0.5 * value_loss).backward()
torch.nn.utils.clip_grad_norm(model.parameters(), 40) # clamping the values
ensure_shared_grads(model, shared_model)
optimizer.step() # running the optimization step
except Exception as e:
print(e)
traceback.print_exc()
var = traceback.format_exc()
下面是输出:-
advantage: tensor([[-1.0750]], grad_fn=<ThSubBackward>)
gae: tensor([[-1.0750]])
policy_loss: tensor([[-25.8590, -26.1414, -25.9023, -25.2628]], grad_fn=<ThSubBackward>)
los torch.Size([1, 4])
RuntimeError:只能为标量输出隐式创建 grad PS E:\ML\Breakout_a3c\Code_With_Comments>
您得到的 pytorch 错误意味着“您只能在标量上向后调用,即 0 维张量”。在这里,根据您的打印,policy_loss
不是标量,它是一个 1x4 矩阵。因此,policy_loss + 0.5 * value_loss
也是如此。因此,您对 backward
的调用会产生错误。
您可能忘记了将损失减少到标量(使用 norm
或 MSELoss
... 等函数)。参见示例 here
它不起作用的原因是梯度传播在内部工作的方式(它基本上是一个雅可比乘法引擎)。您可以在 non-scalar 张量上向后调用,但是您必须自己提供梯度,例如:
# loss is 1x4
loss = policy_loss + 0.5 * value_loss
# explicit gradient backprop with non-scalar tensor
loss.backward(torch.ones(1,4))
如果没有很好地理解 Pytorch 的 Autograd 的工作原理及其含义,您真的不应该这样做。
PS:下次,请提供一个最小的工作示例:)