定义损失函数以使用外部数组
Defining a loss function such that an external array is used
在我的神经网络 (RNN) 中,我定义了损失函数,以便神经网络的输出用于查找索引(二进制),然后使用索引从数组中提取所需的元素这又将用于计算 MSELoss。
但是,程序给出了 parameter().grad = None
错误,这主要是因为图形在某处中断了。错误函数定义有什么问题
框架:Pytorch
代码如下:
神经网络:
class RNN(nn.Module):
def __init__(self):
super(RNN, self).__init__()
self.hidden_size = 8
# self.input_size = 2
self.h2o = nn.Linear(self.hidden_size, 1)
self.h2h = nn.Linear(self.hidden_size, self.hidden_size)
self.sigmoid = nn.Sigmoid()
def forward(self,hidden):
output = self.h2o(hidden)
output = self.sigmoid(output)
hidden = self.h2h(hidden)
return output, hidden
def init_hidden(self):
return torch.zeros(1, self.hidden_size)
损失函数、训练步骤和训练
rnn = RNN()
criterion = nn.MSELoss()
def loss_function(previous, output, index):
code = 2*(output > 0.5).long()
current = Q_m2[code:code+2, i]
return criterion(current, previous), current
def train_step():
hidden = rnn.init_hidden()
rnn.zero_grad()
# Q_m2.requires_grad = True
# Q_m2.create_graph = True
loss = 0
previous = Q_m[0:2, 0]
for i in range(1, samples):
output, hidden = rnn(hidden)
l, previous = loss_function(previous, output, i)
loss+=l
loss.backward()
# Q_m2.retain_grad()
for p in rnn.parameters():
p.data.add_(p.grad.data, alpha=-0.05)
return output, loss.item()/(samples - 1)
def training(epochs):
running_loss = 0
for i in range(epochs):
output, loss = train_step()
print(f'Epoch Number: {i+1}, Loss: {loss}')
running_loss +=loss
Q_m2
Q_m = np.zeros((4, samples))
for i in range(samples):
Q_m[:,i] = q_x(U_m[:,i])
Q_m = torch.FloatTensor(Q_m)
Q_m2 = Q_m
Q_m2.requires_grad = True
Q_m2.create_graph = True
错误:
<ipython-input-36-feefd257c97a> in train_step()
21 # Q_m2.retain_grad()
22 for p in rnn.parameters():
---> 23 p.data.add_(p.grad.data, alpha=-0.05)
24 return output, loss.item()/(samples - 1)
25
AttributeError: 'NoneType' object has no attribute 'data'
这是 K. Frank at discuss.pytorch.org
向我建议的可能解决方案
As I read it, code is calculated to be either 0 or 2. You could
instead interpret output (processed appropriately, as necessary) to be
the probability that code should be 0 vs. 2, and then use that
probability to form a weighted average of the 0 and 2 entries in your
Q_m2 array.
在我的神经网络 (RNN) 中,我定义了损失函数,以便神经网络的输出用于查找索引(二进制),然后使用索引从数组中提取所需的元素这又将用于计算 MSELoss。
但是,程序给出了 parameter().grad = None
错误,这主要是因为图形在某处中断了。错误函数定义有什么问题
框架:Pytorch
代码如下: 神经网络:
class RNN(nn.Module):
def __init__(self):
super(RNN, self).__init__()
self.hidden_size = 8
# self.input_size = 2
self.h2o = nn.Linear(self.hidden_size, 1)
self.h2h = nn.Linear(self.hidden_size, self.hidden_size)
self.sigmoid = nn.Sigmoid()
def forward(self,hidden):
output = self.h2o(hidden)
output = self.sigmoid(output)
hidden = self.h2h(hidden)
return output, hidden
def init_hidden(self):
return torch.zeros(1, self.hidden_size)
损失函数、训练步骤和训练
rnn = RNN()
criterion = nn.MSELoss()
def loss_function(previous, output, index):
code = 2*(output > 0.5).long()
current = Q_m2[code:code+2, i]
return criterion(current, previous), current
def train_step():
hidden = rnn.init_hidden()
rnn.zero_grad()
# Q_m2.requires_grad = True
# Q_m2.create_graph = True
loss = 0
previous = Q_m[0:2, 0]
for i in range(1, samples):
output, hidden = rnn(hidden)
l, previous = loss_function(previous, output, i)
loss+=l
loss.backward()
# Q_m2.retain_grad()
for p in rnn.parameters():
p.data.add_(p.grad.data, alpha=-0.05)
return output, loss.item()/(samples - 1)
def training(epochs):
running_loss = 0
for i in range(epochs):
output, loss = train_step()
print(f'Epoch Number: {i+1}, Loss: {loss}')
running_loss +=loss
Q_m2
Q_m = np.zeros((4, samples))
for i in range(samples):
Q_m[:,i] = q_x(U_m[:,i])
Q_m = torch.FloatTensor(Q_m)
Q_m2 = Q_m
Q_m2.requires_grad = True
Q_m2.create_graph = True
错误:
<ipython-input-36-feefd257c97a> in train_step()
21 # Q_m2.retain_grad()
22 for p in rnn.parameters():
---> 23 p.data.add_(p.grad.data, alpha=-0.05)
24 return output, loss.item()/(samples - 1)
25
AttributeError: 'NoneType' object has no attribute 'data'
这是 K. Frank at discuss.pytorch.org
向我建议的可能解决方案As I read it, code is calculated to be either 0 or 2. You could instead interpret output (processed appropriately, as necessary) to be the probability that code should be 0 vs. 2, and then use that probability to form a weighted average of the 0 and 2 entries in your Q_m2 array.