Python 中对数下降曲线上的梯度下降
Gradient Descent on a logarithmic decline curve in Python
我希望 运行 在对数下降曲线上进行梯度下降,如下所示:
y = y0 - a * ln(b + x).
本例中我的 y0:800
我试图使用关于 a 和 b 的偏导数来做到这一点,但是虽然这显然可以最小化平方误差,但它不会收敛。我知道这不是矢量化的,我可能完全采用了错误的方法。我是犯了任何简单的错误,还是完全没有解决这个问题?
import numpy as np
# constants my gradient descent model should find:
a = 4
b = 4
# function to fit on!
def function(x, a, b):
y0 = 800
return y0 - a * np.log(b + x)
# Generates data
def gen_data(numpoints):
a = 4
b = 4
x = np.array(range(0, numpoints))
y = function(x, a, b)
return x, y
x, y = gen_data(600)
def grad_model(x, y, iterations):
converged = False
# length of dataset
m = len(x)
# guess a , b
theta = [0.1, 0.1]
alpha = 0.001
# initial error
e = np.sum((np.square(function(x, theta[0], theta[1])) - y))
for iteration in range(iterations):
hypothesis = function(x, theta[0], theta[1])
loss = hypothesis - y
# compute partial deritaves to find slope to "fall" into
theta0_grad = (np.mean(np.sum(-np.log(x + y)))) / (m)
theta1_grad = (np.mean((((np.log(theta[1] + x)) / theta[0]) - (x*(np.log(theta[1] + x)) / theta[0])))) / (2*m)
theta0 = theta[0] - (alpha * theta0_grad)
theta1 = theta[1] - (alpha * theta1_grad)
theta[1] = theta1
theta[0] = theta0
new_e = np.sum(np.square((function(x, theta[0], theta[1])) - y))
if new_e > e:
print "AHHHH!"
print "Iteration: "+ str(iteration)
break
print theta
return theta[0], theta[1]
我在您的代码中发现了一些错误。行
e = np.sum((np.square(function(x, theta[0], theta[1])) - y))
不正确,应替换为
e = np.sum((np.square(function(x, theta[0], theta[1]) - y)))
new_e 的公式包含相同的错误。
另外,梯度公式是错误的。你的损失函数是
$L(a,b) = \sum_{i=1}^N y_0 - a \log(b + x_i)$,
所以你必须计算 $L$ 关于 $a$ 和 $b$ 的偏导数。 (难道LaTeX在Whosebug上真的行不通吗?) 最后一点就是梯度下降法是有步长限制的,所以我们的步长一定不能太大。这是您的代码的一个运行更好的版本:
import numpy as np
import matplotlib.pyplot as plt
# constants my gradient descent model should find:
a = 4.0
b = 4.0
y0 = 800.0
# function to fit on!
def function(x, a, b):
# y0 = 800
return y0 - a * np.log(b + x)
# Generates data
def gen_data(numpoints):
# a = 4
# b = 4
x = np.array(range(0, numpoints))
y = function(x, a, b)
return x, y
x, y = gen_data(600)
def grad_model(x, y, iterations):
converged = False
# length of dataset
m = len(x)
# guess a , b
theta = [0.1, 0.1]
alpha = 0.00001
# initial error
# e = np.sum((np.square(function(x, theta[0], theta[1])) - y)) # This was a bug
e = np.sum((np.square(function(x, theta[0], theta[1]) - y)))
costs = np.zeros(iterations)
for iteration in range(iterations):
hypothesis = function(x, theta[0], theta[1])
loss = hypothesis - y
# compute partial deritaves to find slope to "fall" into
# theta0_grad = (np.mean(np.sum(-np.log(x + y)))) / (m)
# theta1_grad = (np.mean((((np.log(theta[1] + x)) / theta[0]) - (x*(np.log(theta[1] + x)) / theta[0])))) / (2*m)
theta0_grad = 2*np.sum((y0 - theta[0]*np.log(theta[1] + x) - y)*(-np.log(theta[1] + x)))
theta1_grad = 2*np.sum((y0 - theta[0]*np.log(theta[1] + x) - y)*(-theta[0]/(b + x)))
theta0 = theta[0] - (alpha * theta0_grad)
theta1 = theta[1] - (alpha * theta1_grad)
theta[1] = theta1
theta[0] = theta0
# new_e = np.sum(np.square((function(x, theta[0], theta[1])) - y)) # This was a bug
new_e = np.sum(np.square((function(x, theta[0], theta[1]) - y)))
costs[iteration] = new_e
if new_e > e:
print "AHHHH!"
print "Iteration: "+ str(iteration)
# break
print theta
return theta[0], theta[1], costs
(theta0,theta1,costs) = grad_model(x,y,100000)
plt.semilogy(costs)
我希望 运行 在对数下降曲线上进行梯度下降,如下所示:
y = y0 - a * ln(b + x).
本例中我的 y0:800
我试图使用关于 a 和 b 的偏导数来做到这一点,但是虽然这显然可以最小化平方误差,但它不会收敛。我知道这不是矢量化的,我可能完全采用了错误的方法。我是犯了任何简单的错误,还是完全没有解决这个问题?
import numpy as np
# constants my gradient descent model should find:
a = 4
b = 4
# function to fit on!
def function(x, a, b):
y0 = 800
return y0 - a * np.log(b + x)
# Generates data
def gen_data(numpoints):
a = 4
b = 4
x = np.array(range(0, numpoints))
y = function(x, a, b)
return x, y
x, y = gen_data(600)
def grad_model(x, y, iterations):
converged = False
# length of dataset
m = len(x)
# guess a , b
theta = [0.1, 0.1]
alpha = 0.001
# initial error
e = np.sum((np.square(function(x, theta[0], theta[1])) - y))
for iteration in range(iterations):
hypothesis = function(x, theta[0], theta[1])
loss = hypothesis - y
# compute partial deritaves to find slope to "fall" into
theta0_grad = (np.mean(np.sum(-np.log(x + y)))) / (m)
theta1_grad = (np.mean((((np.log(theta[1] + x)) / theta[0]) - (x*(np.log(theta[1] + x)) / theta[0])))) / (2*m)
theta0 = theta[0] - (alpha * theta0_grad)
theta1 = theta[1] - (alpha * theta1_grad)
theta[1] = theta1
theta[0] = theta0
new_e = np.sum(np.square((function(x, theta[0], theta[1])) - y))
if new_e > e:
print "AHHHH!"
print "Iteration: "+ str(iteration)
break
print theta
return theta[0], theta[1]
我在您的代码中发现了一些错误。行
e = np.sum((np.square(function(x, theta[0], theta[1])) - y))
不正确,应替换为
e = np.sum((np.square(function(x, theta[0], theta[1]) - y)))
new_e 的公式包含相同的错误。
另外,梯度公式是错误的。你的损失函数是 $L(a,b) = \sum_{i=1}^N y_0 - a \log(b + x_i)$, 所以你必须计算 $L$ 关于 $a$ 和 $b$ 的偏导数。 (难道LaTeX在Whosebug上真的行不通吗?) 最后一点就是梯度下降法是有步长限制的,所以我们的步长一定不能太大。这是您的代码的一个运行更好的版本:
import numpy as np
import matplotlib.pyplot as plt
# constants my gradient descent model should find:
a = 4.0
b = 4.0
y0 = 800.0
# function to fit on!
def function(x, a, b):
# y0 = 800
return y0 - a * np.log(b + x)
# Generates data
def gen_data(numpoints):
# a = 4
# b = 4
x = np.array(range(0, numpoints))
y = function(x, a, b)
return x, y
x, y = gen_data(600)
def grad_model(x, y, iterations):
converged = False
# length of dataset
m = len(x)
# guess a , b
theta = [0.1, 0.1]
alpha = 0.00001
# initial error
# e = np.sum((np.square(function(x, theta[0], theta[1])) - y)) # This was a bug
e = np.sum((np.square(function(x, theta[0], theta[1]) - y)))
costs = np.zeros(iterations)
for iteration in range(iterations):
hypothesis = function(x, theta[0], theta[1])
loss = hypothesis - y
# compute partial deritaves to find slope to "fall" into
# theta0_grad = (np.mean(np.sum(-np.log(x + y)))) / (m)
# theta1_grad = (np.mean((((np.log(theta[1] + x)) / theta[0]) - (x*(np.log(theta[1] + x)) / theta[0])))) / (2*m)
theta0_grad = 2*np.sum((y0 - theta[0]*np.log(theta[1] + x) - y)*(-np.log(theta[1] + x)))
theta1_grad = 2*np.sum((y0 - theta[0]*np.log(theta[1] + x) - y)*(-theta[0]/(b + x)))
theta0 = theta[0] - (alpha * theta0_grad)
theta1 = theta[1] - (alpha * theta1_grad)
theta[1] = theta1
theta[0] = theta0
# new_e = np.sum(np.square((function(x, theta[0], theta[1])) - y)) # This was a bug
new_e = np.sum(np.square((function(x, theta[0], theta[1]) - y)))
costs[iteration] = new_e
if new_e > e:
print "AHHHH!"
print "Iteration: "+ str(iteration)
# break
print theta
return theta[0], theta[1], costs
(theta0,theta1,costs) = grad_model(x,y,100000)
plt.semilogy(costs)