梯度下降没有收敛到它的最小值
Gradient descent is not converging to it's minima
我知道这个问题被问过很多次了,但我仍然有问题。我选择了一个较小的 alpha 值并执行了大量迭代以找到收敛点,但它不起作用。任何帮助将不胜感激。这是完整的代码。
GradientDescent() 和 Cost() 函数正在计算 m 和 b 值,并且
line() 和 show() 函数仅用于绘制数据。
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
def show(x,y):
plt.plot(x,y,"ro")
plt.show()
def line(m,b):
xpoints=np.arange(25)
ypoints=np.zeros(len(xpoints))
for i in range(len(xpoints)):
ypoints[i]=m*xpoints[i]+b
plt.plot(xpoints,ypoints,alpha=0.2)
def cost(xpoints,ypoints,m,b,flag):
pridicted_y=np.zeros(len(xpoints))
error=np.zeros(len(xpoints))
TotalError=0
for i in range(len(xpoints)):
if(flag==0):
pridicted_y[i] = m*xpoints[i]+b
error[i]= pridicted_y[i] - ypoints[i]
if(flag==1):
pridicted_y[i] = m*xpoints[i]+b
error[i]= (pridicted_y[i] - ypoints[i])*xpoints[i]
TotalError=TotalError+error[i]
# plt.plot([xpoints[i],xpoints[i]],[ypoints[i],pridicted_y[i]])
# print(error[i],end=" ")
return TotalError
def GradientDescent(xpoints,ypoints,m,b,alpha):
k=len(xpoints)
M=m
B=b
x=0
for i in range(500):
for j in range(2):
M = m-alpha*(1/k)*cost(xpoints,ypoints,m,b,0)
B = b-alpha*(1/k)*cost(xpoints,ypoints,m,b,1)
m=M
b=B
line(m,b)
if(x==1):
plt.show()
print(i,m,b)
return (m,b)
#random data set
x=np.arange(20)
y = [int(i+random.random()*10) for i in x]
min_x = np.mean(x)
min_y = np.mean(y)
#predetermined variable values
m=0
b=0
alpha=0.001
# cost(x,y,m,b)
m,b=GradientDescent(x,y,m,b,alpha)
plt.plot(min_x,min_y,"ko")
plt.plot(min_x,m*min_x+b,"go")
line(m,b)
show(x,y)
print(m,b)
看来你刚刚犯了一个小错误:在计算梯度的cost
函数中,梯度w.r.tm
被梯度[=21代替了=] b
反之亦然。通过如下更改标志
if(flag==1):#instead of flag==0
pridicted_y[i] = m*xpoints[i]+b
error[i]= pridicted_y[i] - ypoints[i]
if(flag==0):#instead of flag==1
pridicted_y[i] = m*xpoints[i]+b
error[i]= (pridicted_y[i] - ypoints[i])*xpoints[i]
我得到这个结果:
我知道这个问题被问过很多次了,但我仍然有问题。我选择了一个较小的 alpha 值并执行了大量迭代以找到收敛点,但它不起作用。任何帮助将不胜感激。这是完整的代码。 GradientDescent() 和 Cost() 函数正在计算 m 和 b 值,并且 line() 和 show() 函数仅用于绘制数据。
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
def show(x,y):
plt.plot(x,y,"ro")
plt.show()
def line(m,b):
xpoints=np.arange(25)
ypoints=np.zeros(len(xpoints))
for i in range(len(xpoints)):
ypoints[i]=m*xpoints[i]+b
plt.plot(xpoints,ypoints,alpha=0.2)
def cost(xpoints,ypoints,m,b,flag):
pridicted_y=np.zeros(len(xpoints))
error=np.zeros(len(xpoints))
TotalError=0
for i in range(len(xpoints)):
if(flag==0):
pridicted_y[i] = m*xpoints[i]+b
error[i]= pridicted_y[i] - ypoints[i]
if(flag==1):
pridicted_y[i] = m*xpoints[i]+b
error[i]= (pridicted_y[i] - ypoints[i])*xpoints[i]
TotalError=TotalError+error[i]
# plt.plot([xpoints[i],xpoints[i]],[ypoints[i],pridicted_y[i]])
# print(error[i],end=" ")
return TotalError
def GradientDescent(xpoints,ypoints,m,b,alpha):
k=len(xpoints)
M=m
B=b
x=0
for i in range(500):
for j in range(2):
M = m-alpha*(1/k)*cost(xpoints,ypoints,m,b,0)
B = b-alpha*(1/k)*cost(xpoints,ypoints,m,b,1)
m=M
b=B
line(m,b)
if(x==1):
plt.show()
print(i,m,b)
return (m,b)
#random data set
x=np.arange(20)
y = [int(i+random.random()*10) for i in x]
min_x = np.mean(x)
min_y = np.mean(y)
#predetermined variable values
m=0
b=0
alpha=0.001
# cost(x,y,m,b)
m,b=GradientDescent(x,y,m,b,alpha)
plt.plot(min_x,min_y,"ko")
plt.plot(min_x,m*min_x+b,"go")
line(m,b)
show(x,y)
print(m,b)
看来你刚刚犯了一个小错误:在计算梯度的cost
函数中,梯度w.r.tm
被梯度[=21代替了=] b
反之亦然。通过如下更改标志
if(flag==1):#instead of flag==0
pridicted_y[i] = m*xpoints[i]+b
error[i]= pridicted_y[i] - ypoints[i]
if(flag==0):#instead of flag==1
pridicted_y[i] = m*xpoints[i]+b
error[i]= (pridicted_y[i] - ypoints[i])*xpoints[i]
我得到这个结果: