mxnet.autograd 线性回归的梯度下降 - 性能问题

Gradient descent for linear regression with mxnet.autograd - performance issues

我使用 mxnet.autograd.
为线性回归实现了一个简单的梯度下降算法 一切正常,但性能很糟糕。我使用的是香草梯度下降法,而不是 SGD,但我怀疑这就是问题所在……如果我简单地使用梯度的解析表达式,则超过 1000 轮的整个过程大约需要 2 秒,但使用 autograd 可以达到 147 秒。

这是代码的实现


from mxnet import nd, autograd, gluon
import pandas as pd



def main():
    # learning algorithm parameters
    nr_epochs = 1000
    alpha = 0.01

    # read data
    data = pd.read_csv("dataset.txt", header=0, index_col=None, sep="\s+")


    # ---------------------------------
    # --   using gradient descent   ---
    # ---------------------------------
    data.insert(0, "x_0", 1, True)                              # insert column of "1"s as x_0
    m = data.shape[0]                                           # number of samples
    n = data.shape[1] - 1                                       # number of features
    X = nd.array(data.iloc[:, 0:n].values)                      # array with x values
    Y = nd.array(data.iloc[:, -1].values)                       # array with y values

    theta = nd.zeros(n)                                         # initial parameters array
    theta.attach_grad()                                         # declare gradient with respect to theta is needed
    # ----------------------------------------------------------
    theta, Loss = GradientDescent(X, Y, theta, alpha, nr_epochs)
    # ----------------------------------------------------------

    print("Theta by gradient descent:")
    print(theta)


#--------------#
#   END MAIN   #
#--------------#



#-------------------#
#   loss function   #
#-------------------#
def LossFunction(X, Y, theta):
    m = X.shape[0]                  # number of training samples
    loss = 0
    for i in range(X.shape[0]):
        loss = loss + (1 / (2 * m)) * (H(X[i, :], theta) - Y[i]) ** 2
    return loss


#----------------#
#   hypothesis   #
#----------------#
def H(x, theta):
    return nd.dot(x, theta)



#----------------------#
#   gradient descent   #
#----------------------#
def GradientDescent(X, Y, theta, alpha, nr_epochs):

    Loss = nd.zeros(nr_epochs)                                 # array containing values of loss function over iterations

    for epoch in range(nr_epochs):
        with autograd.record():
            loss = LossFunction(X, Y, theta)
        loss.backward()
        Loss[epoch] = loss

        for j in range(len(theta)):
            theta[j] = theta[j] - alpha * theta.grad[j]

    return theta, Loss




if __name__ == "__main__":
    main()


瓶颈是对

的调用
theta, Loss = GradientDescent(X, Y, theta, alpha, nr_epochs)

我是不是做错了什么? 我看过其他一些示例,这些示例的运行速度比我的快得多,有什么我可以修改以减少 运行 时间的吗? 谢谢!

问题是您正在遍历数组以更新参数。您应该改用矢量化方法。

from mxnet import nd, autograd, gluon
import pandas as pd


def main():
    # learning algorithm parameters
    nr_epochs = 1000
    alpha = 0.01

    m = 10000
    n = 50
    X = nd.random.uniform(shape=(m, n))
    Y = nd.random.uniform(shape=(m,1))                       # array with y values
    X.attach_grad()
    Y.attach_grad()

    theta = nd.zeros((n,1))                                         # initial parameters array
    theta.attach_grad()                                         # declare gradient with respect to theta is needed
    # ----------------------------------------------------------
    theta, Loss = GradientDescent(X, Y, theta, alpha, nr_epochs)
    # ----------------------------------------------------------

    print("Theta by gradient descent:")
    print(theta)


#--------------#
#   END MAIN   #
#--------------#



#-------------------#
#   loss function   #
#-------------------#
def LossFunction(X, Y, theta):
    m = X.shape[0]                  # number of training samples
    loss = (1 / (2 * m)) * ((nd.dot(X, theta) - Y) ** 2).sum()
    return loss



#----------------------#
#   gradient descent   #
#----------------------#
def GradientDescent(X, Y, theta, alpha, nr_epochs):

    Loss = nd.zeros(nr_epochs)                                 # array containing values of loss function over iterations

    for epoch in range(nr_epochs):
        theta.attach_grad()
        with autograd.record():
            loss = LossFunction(X, Y, theta)
        loss.backward()
        Loss[epoch] = loss.asnumpy()
        theta = theta - alpha * theta.grad

    return theta, Loss

if __name__ == '__main__':
    main()

这是一个在 1 秒内运行 10000 行和 50 个维度的示例