梯度下降没有收敛到它的最小值

Gradient descent is not converging to it's minima

我知道这个问题被问过很多次了,但我仍然有问题。我选择了一个较小的 alpha 值并执行了大量迭代以找到收敛点,但它不起作用。任何帮助将不胜感激。这是完整的代码。 GradientDescent() 和 Cost() 函数正在计算 m 和 b 值,并且 line() 和 show() 函数仅用于绘制数据。

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random

def show(x,y):
    plt.plot(x,y,"ro")
    plt.show()

def line(m,b):
    xpoints=np.arange(25)
    ypoints=np.zeros(len(xpoints))

    for i in range(len(xpoints)):
        ypoints[i]=m*xpoints[i]+b

    plt.plot(xpoints,ypoints,alpha=0.2)

def cost(xpoints,ypoints,m,b,flag):
    pridicted_y=np.zeros(len(xpoints))
    error=np.zeros(len(xpoints))
    TotalError=0

    for i in range(len(xpoints)):
        if(flag==0):
            pridicted_y[i] = m*xpoints[i]+b
            error[i]= pridicted_y[i] - ypoints[i]
        if(flag==1):
            pridicted_y[i] = m*xpoints[i]+b
            error[i]= (pridicted_y[i] - ypoints[i])*xpoints[i]
        TotalError=TotalError+error[i]

        # plt.plot([xpoints[i],xpoints[i]],[ypoints[i],pridicted_y[i]])
        # print(error[i],end=" ")
    return TotalError
def GradientDescent(xpoints,ypoints,m,b,alpha):
    k=len(xpoints)
    M=m
    B=b
    x=0
    for i in range(500):
        for j in range(2):
            M = m-alpha*(1/k)*cost(xpoints,ypoints,m,b,0)
            B = b-alpha*(1/k)*cost(xpoints,ypoints,m,b,1)
        m=M
        b=B
        line(m,b)
        if(x==1):
            plt.show()
            print(i,m,b)
    return (m,b)
#random data set
x=np.arange(20)
y = [int(i+random.random()*10) for i in x]
min_x = np.mean(x)
min_y = np.mean(y)


#predetermined variable values
m=0
b=0
alpha=0.001

# cost(x,y,m,b)
m,b=GradientDescent(x,y,m,b,alpha)
plt.plot(min_x,min_y,"ko")
plt.plot(min_x,m*min_x+b,"go")
line(m,b)
show(x,y)
print(m,b)

看来你刚刚犯了一个小错误:在计算梯度的cost函数中,梯度w.r.tm被梯度[=21代替了=] b 反之亦然。通过如下更改标志

    if(flag==1):#instead of flag==0
        pridicted_y[i] = m*xpoints[i]+b
        error[i]= pridicted_y[i] - ypoints[i]
    if(flag==0):#instead of flag==1
        pridicted_y[i] = m*xpoints[i]+b
        error[i]= (pridicted_y[i] - ypoints[i])*xpoints[i]

我得到这个结果: