pyTorch 中的爆炸损失

Question

我正在尝试在 pytorch 中训练一个潜在 space 模型。该模型相对简单，只需要我最小化我的损失函数，但我得到了一个奇怪的错误。运行一段时间后，损失突然暴涨。

import numpy as np
import scipy.sparse.csgraph as csg
import torch
from torch.autograd import Variable
import torch.autograd as autograd
import matplotlib.pyplot as plt
%matplotlib inline

def cmdscale(D):
    # Number of points                                                                        
    n = len(D)

    # Centering matrix                                                                        
    H = np.eye(n) - np.ones((n, n))/n

    # YY^T                                                                                    
    B = -H.dot(D**2).dot(H)/2

    # Diagonalize                                                                             
    evals, evecs = np.linalg.eigh(B)

    # Sort by eigenvalue in descending order                                                  
    idx   = np.argsort(evals)[::-1]
    evals = evals[idx]
    evecs = evecs[:,idx]

    # Compute the coordinates using positive-eigenvalued components only                      
    w, = np.where(evals > 0)
    L  = np.diag(np.sqrt(evals[w]))
    V  = evecs[:,w]
    Y  = V.dot(L)

    return Y, evals

Y = np.array([[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
              [0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0.],
              [0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
              [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0.],
              [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0.],
              [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
              [0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1.],
              [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
              [1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1.],
              [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
              [0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
              [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1.],
              [0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0.],
              [0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0.],
              [0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0.]])

temp = Y[~np.all(Y == 0, axis=1)]
temp = temp[:,~np.all(Y == 0, axis=1)]
Y = temp

n = np.shape(Y)[0]
k = 2

D = csg.shortest_path(Y, directed=True)
Z = cmdscale(D)[0][:,0:k]
Z = Z - Z.mean(axis=0, keepdims=True)

tZ = autograd.Variable(torch.Tensor(Z), requires_grad=True)
B = autograd.Variable(torch.Tensor([0]), requires_grad=True)
tY = torch.autograd.Variable(torch.Tensor(Y), requires_grad=False)

#calculating pairwise euclidean distance
def distMatrix(m):
    n = m.size(0)
    d = m.size(1)
    x = m.unsqueeze(1).expand(n, n, d)
    y = m.unsqueeze(0).expand(n, n, d)
    return torch.sqrt(torch.pow(x - y, 2).sum(2) + 1e-4)

def loss(tY):
    d = -distMatrix(tZ)+B
    sigmoidD = torch.sigmoid(d)
    #removing diagonal
    reduce = tY*torch.log(sigmoidD)+(1-tY)*torch.log(1-sigmoidD)
    reduce[torch.eye(n).byte()] = 0
    return -reduce.sum()

losses = []
learning_rate = 1e-4
l = loss(tY)
stepSize = 1000

for i in range(stepSize):
    l.backward(retain_graph=True)
    losses.append(float(loss(tY)))
    tZ.data = tZ.data - learning_rate * tZ.grad.data
    B.data = B.data - learning_rate * B.grad.data

    tZ.grad.data.zero_()
    B.grad.data.zero_()

plt.subplot(122)
plt.plot(losses)
plt.title('Loss')
plt.xlabel('Iteration')
plt.ylabel('loss')

plt.show()

损失不应该继续下降吗？或者至少收敛到某个点？我一定是做错了什么，我是 pytorch 的新手，任何正确方向的提示或推动都将不胜感激！

Answer 1

问题是我定义了损失

l = loss(tY)

在运行并更新我的梯度的循环之外，我不完全确定为什么它会产生这样的效果，但是将损失函数定义移动到循环内部解决了问题，导致本次损失：

pyTorch 中的爆炸损失

Exploding loss in pyTorch

python

torch

pytorch

loss-function