梯度下降不适用于 python 中逻辑概率的最大似然
Gradient descent isn't working for maximum likelihood with logistic probability in python
所以我一直在尝试 运行 使用 python 的基于梯度的算法,但我没有得到收敛的结果。我会拍下我要放入代码中的内容:
我的代码如下:
#base packages
#import sympy as sp
#from sympy import *
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
x = np.array([0,0,0,0.1,0.1,0.3,0.3,0.9,0.9,0.9])
y = np.array([0.,0.,1.,0.,1.,1.,1.,0.,1.,1.])
def f(b0,b1,x,y):
vec = [y[i]*np.log(1/(1+np.exp(-b0-b1*x[i]))) + (1-y[i])*np.log(1 - (1/(1+np.exp(-b0-b1*x[i])))) for i in range(len(y))]
return sum(vec)
def dervf0(b0,b1,x,y):
vec = [-y[i] + (1/(1+np.exp(-b0-b1*x[i]))) for i in range(len(x))]
return np.sum(vec)
def dervf1(b0,b1,x,y):
vec = [-x[i]*(y[i]-(1/(1+np.exp(-b0-b1*x[i])))) for i in range(len(x))]
return sum(vec)
def G(f1,f2,b0,b1,x,y,tol,maxiter):
v = np.array([b0,b1])
theta_new = v
for i in range(maxiter):
theta_new = v - 0.001*np.array([f1(b0,b1,x,y),f2(b0,b1,x,y)])
if np.linalg.norm(theta_new - v) < tol:
break
else:
v = theta_new
return theta_new,i
结果应该是一个向量[-0.009,1.263]'。我怎么没有得到收敛的结果。任何的想法?
Y
我不明白为什么要定义 f1
和 f2
。
问题是您没有在下一次迭代中使用更新的参数 b0, b1
。您正在更新 v
而不是 b0, b1
为每次迭代添加这个
b0 = v[0]
b1 = v[1]
试试这个矢量化实现。
矢量化实施工作更快。
最后的theta_new是[-0.00923525 1.26245957]
.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
x = np.array([0, 0, 0, 0.1, 0.1, 0.3, 0.3, 0.9, 0.9, 0.9])
y = np.array([0., 0., 1., 0., 1., 1., 1., 0., 1., 1.])
def f(b0, b1, x, y):
return np.sum(
np.multiply(y, np.log(1 / (1 + np.exp(-b0 - b1 * x)))) +
np.multiply(1 - y, np.log(1 - (1 / (1 + np.exp(-b0 - b1 * x))))))
def dervf0(b0, b1, x, y):
return np.sum(-1 * y + (1 / (1 + np.exp(-b0 - b1 * x))))
def dervf1(b0, b1, x, y):
return np.sum(np.multiply(-1 * x, y - (1 / (1 + np.exp(-b0 - b1 * x)))))
def G(v, x, y, tol, maxiter):
theta_new = v
for i in range(maxiter):
theta_new = v - 0.001 * np.array(
[dervf0(v[0], v[1], x, y),
dervf1(v[0], v[1], x, y)])
if np.linalg.norm(theta_new - v) < tol:
break
else:
v = theta_new
print('i\t{}\tv\t{}\ttheta_new\t{}'.format(i, v, theta_new))
return theta_new, i
tol = 0.0000001
maxiter = 1000000
v = np.random.normal(0, 1, 2)
theta_new, i = G(v, x, y, tol, maxiter)
所以我一直在尝试 运行 使用 python 的基于梯度的算法,但我没有得到收敛的结果。我会拍下我要放入代码中的内容:
我的代码如下:
#base packages
#import sympy as sp
#from sympy import *
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
x = np.array([0,0,0,0.1,0.1,0.3,0.3,0.9,0.9,0.9])
y = np.array([0.,0.,1.,0.,1.,1.,1.,0.,1.,1.])
def f(b0,b1,x,y):
vec = [y[i]*np.log(1/(1+np.exp(-b0-b1*x[i]))) + (1-y[i])*np.log(1 - (1/(1+np.exp(-b0-b1*x[i])))) for i in range(len(y))]
return sum(vec)
def dervf0(b0,b1,x,y):
vec = [-y[i] + (1/(1+np.exp(-b0-b1*x[i]))) for i in range(len(x))]
return np.sum(vec)
def dervf1(b0,b1,x,y):
vec = [-x[i]*(y[i]-(1/(1+np.exp(-b0-b1*x[i])))) for i in range(len(x))]
return sum(vec)
def G(f1,f2,b0,b1,x,y,tol,maxiter):
v = np.array([b0,b1])
theta_new = v
for i in range(maxiter):
theta_new = v - 0.001*np.array([f1(b0,b1,x,y),f2(b0,b1,x,y)])
if np.linalg.norm(theta_new - v) < tol:
break
else:
v = theta_new
return theta_new,i
结果应该是一个向量[-0.009,1.263]'。我怎么没有得到收敛的结果。任何的想法? Y
我不明白为什么要定义 f1
和 f2
。
问题是您没有在下一次迭代中使用更新的参数 b0, b1
。您正在更新 v
而不是 b0, b1
为每次迭代添加这个
b0 = v[0]
b1 = v[1]
试试这个矢量化实现。
矢量化实施工作更快。
最后的theta_new是[-0.00923525 1.26245957]
.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
x = np.array([0, 0, 0, 0.1, 0.1, 0.3, 0.3, 0.9, 0.9, 0.9])
y = np.array([0., 0., 1., 0., 1., 1., 1., 0., 1., 1.])
def f(b0, b1, x, y):
return np.sum(
np.multiply(y, np.log(1 / (1 + np.exp(-b0 - b1 * x)))) +
np.multiply(1 - y, np.log(1 - (1 / (1 + np.exp(-b0 - b1 * x))))))
def dervf0(b0, b1, x, y):
return np.sum(-1 * y + (1 / (1 + np.exp(-b0 - b1 * x))))
def dervf1(b0, b1, x, y):
return np.sum(np.multiply(-1 * x, y - (1 / (1 + np.exp(-b0 - b1 * x)))))
def G(v, x, y, tol, maxiter):
theta_new = v
for i in range(maxiter):
theta_new = v - 0.001 * np.array(
[dervf0(v[0], v[1], x, y),
dervf1(v[0], v[1], x, y)])
if np.linalg.norm(theta_new - v) < tol:
break
else:
v = theta_new
print('i\t{}\tv\t{}\ttheta_new\t{}'.format(i, v, theta_new))
return theta_new, i
tol = 0.0000001
maxiter = 1000000
v = np.random.normal(0, 1, 2)
theta_new, i = G(v, x, y, tol, maxiter)