mxnet.autograd 线性回归的梯度下降 - 性能问题
Gradient descent for linear regression with mxnet.autograd - performance issues
我使用 mxnet.autograd.
为线性回归实现了一个简单的梯度下降算法
一切正常,但性能很糟糕。我使用的是香草梯度下降法,而不是 SGD,但我怀疑这就是问题所在……如果我简单地使用梯度的解析表达式,则超过 1000 轮的整个过程大约需要 2 秒,但使用 autograd 可以达到 147 秒。
这是代码的实现
from mxnet import nd, autograd, gluon
import pandas as pd
def main():
# learning algorithm parameters
nr_epochs = 1000
alpha = 0.01
# read data
data = pd.read_csv("dataset.txt", header=0, index_col=None, sep="\s+")
# ---------------------------------
# -- using gradient descent ---
# ---------------------------------
data.insert(0, "x_0", 1, True) # insert column of "1"s as x_0
m = data.shape[0] # number of samples
n = data.shape[1] - 1 # number of features
X = nd.array(data.iloc[:, 0:n].values) # array with x values
Y = nd.array(data.iloc[:, -1].values) # array with y values
theta = nd.zeros(n) # initial parameters array
theta.attach_grad() # declare gradient with respect to theta is needed
# ----------------------------------------------------------
theta, Loss = GradientDescent(X, Y, theta, alpha, nr_epochs)
# ----------------------------------------------------------
print("Theta by gradient descent:")
print(theta)
#--------------#
# END MAIN #
#--------------#
#-------------------#
# loss function #
#-------------------#
def LossFunction(X, Y, theta):
m = X.shape[0] # number of training samples
loss = 0
for i in range(X.shape[0]):
loss = loss + (1 / (2 * m)) * (H(X[i, :], theta) - Y[i]) ** 2
return loss
#----------------#
# hypothesis #
#----------------#
def H(x, theta):
return nd.dot(x, theta)
#----------------------#
# gradient descent #
#----------------------#
def GradientDescent(X, Y, theta, alpha, nr_epochs):
Loss = nd.zeros(nr_epochs) # array containing values of loss function over iterations
for epoch in range(nr_epochs):
with autograd.record():
loss = LossFunction(X, Y, theta)
loss.backward()
Loss[epoch] = loss
for j in range(len(theta)):
theta[j] = theta[j] - alpha * theta.grad[j]
return theta, Loss
if __name__ == "__main__":
main()
瓶颈是对
的调用
theta, Loss = GradientDescent(X, Y, theta, alpha, nr_epochs)
我是不是做错了什么?
我看过其他一些示例,这些示例的运行速度比我的快得多,有什么我可以修改以减少 运行 时间的吗?
谢谢!
问题是您正在遍历数组以更新参数。您应该改用矢量化方法。
from mxnet import nd, autograd, gluon
import pandas as pd
def main():
# learning algorithm parameters
nr_epochs = 1000
alpha = 0.01
m = 10000
n = 50
X = nd.random.uniform(shape=(m, n))
Y = nd.random.uniform(shape=(m,1)) # array with y values
X.attach_grad()
Y.attach_grad()
theta = nd.zeros((n,1)) # initial parameters array
theta.attach_grad() # declare gradient with respect to theta is needed
# ----------------------------------------------------------
theta, Loss = GradientDescent(X, Y, theta, alpha, nr_epochs)
# ----------------------------------------------------------
print("Theta by gradient descent:")
print(theta)
#--------------#
# END MAIN #
#--------------#
#-------------------#
# loss function #
#-------------------#
def LossFunction(X, Y, theta):
m = X.shape[0] # number of training samples
loss = (1 / (2 * m)) * ((nd.dot(X, theta) - Y) ** 2).sum()
return loss
#----------------------#
# gradient descent #
#----------------------#
def GradientDescent(X, Y, theta, alpha, nr_epochs):
Loss = nd.zeros(nr_epochs) # array containing values of loss function over iterations
for epoch in range(nr_epochs):
theta.attach_grad()
with autograd.record():
loss = LossFunction(X, Y, theta)
loss.backward()
Loss[epoch] = loss.asnumpy()
theta = theta - alpha * theta.grad
return theta, Loss
if __name__ == '__main__':
main()
这是一个在 1 秒内运行 10000 行和 50 个维度的示例
我使用 mxnet.autograd.
为线性回归实现了一个简单的梯度下降算法
一切正常,但性能很糟糕。我使用的是香草梯度下降法,而不是 SGD,但我怀疑这就是问题所在……如果我简单地使用梯度的解析表达式,则超过 1000 轮的整个过程大约需要 2 秒,但使用 autograd 可以达到 147 秒。
这是代码的实现
from mxnet import nd, autograd, gluon
import pandas as pd
def main():
# learning algorithm parameters
nr_epochs = 1000
alpha = 0.01
# read data
data = pd.read_csv("dataset.txt", header=0, index_col=None, sep="\s+")
# ---------------------------------
# -- using gradient descent ---
# ---------------------------------
data.insert(0, "x_0", 1, True) # insert column of "1"s as x_0
m = data.shape[0] # number of samples
n = data.shape[1] - 1 # number of features
X = nd.array(data.iloc[:, 0:n].values) # array with x values
Y = nd.array(data.iloc[:, -1].values) # array with y values
theta = nd.zeros(n) # initial parameters array
theta.attach_grad() # declare gradient with respect to theta is needed
# ----------------------------------------------------------
theta, Loss = GradientDescent(X, Y, theta, alpha, nr_epochs)
# ----------------------------------------------------------
print("Theta by gradient descent:")
print(theta)
#--------------#
# END MAIN #
#--------------#
#-------------------#
# loss function #
#-------------------#
def LossFunction(X, Y, theta):
m = X.shape[0] # number of training samples
loss = 0
for i in range(X.shape[0]):
loss = loss + (1 / (2 * m)) * (H(X[i, :], theta) - Y[i]) ** 2
return loss
#----------------#
# hypothesis #
#----------------#
def H(x, theta):
return nd.dot(x, theta)
#----------------------#
# gradient descent #
#----------------------#
def GradientDescent(X, Y, theta, alpha, nr_epochs):
Loss = nd.zeros(nr_epochs) # array containing values of loss function over iterations
for epoch in range(nr_epochs):
with autograd.record():
loss = LossFunction(X, Y, theta)
loss.backward()
Loss[epoch] = loss
for j in range(len(theta)):
theta[j] = theta[j] - alpha * theta.grad[j]
return theta, Loss
if __name__ == "__main__":
main()
瓶颈是对
的调用theta, Loss = GradientDescent(X, Y, theta, alpha, nr_epochs)
我是不是做错了什么? 我看过其他一些示例,这些示例的运行速度比我的快得多,有什么我可以修改以减少 运行 时间的吗? 谢谢!
问题是您正在遍历数组以更新参数。您应该改用矢量化方法。
from mxnet import nd, autograd, gluon
import pandas as pd
def main():
# learning algorithm parameters
nr_epochs = 1000
alpha = 0.01
m = 10000
n = 50
X = nd.random.uniform(shape=(m, n))
Y = nd.random.uniform(shape=(m,1)) # array with y values
X.attach_grad()
Y.attach_grad()
theta = nd.zeros((n,1)) # initial parameters array
theta.attach_grad() # declare gradient with respect to theta is needed
# ----------------------------------------------------------
theta, Loss = GradientDescent(X, Y, theta, alpha, nr_epochs)
# ----------------------------------------------------------
print("Theta by gradient descent:")
print(theta)
#--------------#
# END MAIN #
#--------------#
#-------------------#
# loss function #
#-------------------#
def LossFunction(X, Y, theta):
m = X.shape[0] # number of training samples
loss = (1 / (2 * m)) * ((nd.dot(X, theta) - Y) ** 2).sum()
return loss
#----------------------#
# gradient descent #
#----------------------#
def GradientDescent(X, Y, theta, alpha, nr_epochs):
Loss = nd.zeros(nr_epochs) # array containing values of loss function over iterations
for epoch in range(nr_epochs):
theta.attach_grad()
with autograd.record():
loss = LossFunction(X, Y, theta)
loss.backward()
Loss[epoch] = loss.asnumpy()
theta = theta - alpha * theta.grad
return theta, Loss
if __name__ == '__main__':
main()
这是一个在 1 秒内运行 10000 行和 50 个维度的示例