为什么我的线性回归批量梯度下降不收敛?
Why my batch gradient descent for linear regression don't converge?
我想编写一个类似于 sklearn.linear_model.LinearRegression
的线性回归模型。首先,我使用 sklearn.linear_model.LinearRegression
:
训练标准线性回归模型
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
# training data
train_x = np.array([1,2,3,4,5,6], dtype=np.float64).reshape(6,1)
train_y = np.array([1,2,3,3.25,3.5,3.8], dtype=np.float64)
# test data
predict_x = np.arange(0, 7, 0.1)
predict_x = predict_x.reshape(predict_x.size, 1)
# Simple regression
model1 = linear_model.LinearRegression()
model1.fit(train_x, train_y);
print model1.coef_, model1.intercept_
# Quadratic regression
model2 = linear_model.LinearRegression()
model2.fit(np.concatenate((train_x, train_x**2), axis=1), train_y);
print model2.coef_, model2.intercept_
# Five-order polynomial regression
model5 = linear_model.LinearRegression()
model5.fit(np.concatenate((train_x, train_x**2, train_x**3, train_x**4, train_x**5), axis=1), train_y);
print model5.coef_, model5.intercept_
# Predict
predict_y1 = model1.predict(predict_x)
predict_y2 = model2.predict(np.concatenate((predict_x, predict_x**2), axis=1))
predict_y5 = model5.predict(np.concatenate((predict_x, predict_x**2, predict_x**3, predict_x**4, predict_x**5), axis=1))
# plot
plt.figure(figsize = (10,10))
plt.scatter(train_x, train_y, color='black')
plt.plot(predict_x, predict_y1, color='blue', label='underfitting')
plt.plot(predict_x, predict_y2, color='green', label='fair')
plt.plot(predict_x, predict_y5, color='red', label='overfitting')
plt.axis([0,7,0,5])
plt.legend(loc=2)
plt.show()
然后,我得到了好的结果:
[ 0.53571429] 0.883333333333
[ 1.34821429 -0.11607143] -0.2
[-8.52333333 7.0625 -2.30833333 0.3375 -0.01833333] 4.45
之后,我实现了我的模型MyLinearRegression
。首先,我选择批量梯度下降和固定迭代次数来测试我的代码是否正确。
# center data
def center_matrix(X):
assert(isinstance(X, np.ndarray))
X_offset = np.average(X, axis=0)
return X - X_offset, X_offset
class MyLinearRegression(object):
def __init__(self):
self.coef_ = None
self.intercept_ = None
self.learning_rate = None
def fit(self, X, y):
n_samples, n_features = X.shape
n_samples_, = y.shape
assert(n_samples == n_samples_)
X, X_offset = center_matrix(X)
y, y_offset = center_matrix(y)
self.coef_ = np.ones((n_features,), dtype=np.float64)
self.learning_rate = -0.0001
error = None
# using fixed iteration number
for epoch in np.arange(500000):
y_hat = X.dot(self.coef_)
error_ = y_hat-y
if error is not None and sum(error_**2) > sum(error**2): # if square error is increasing, then half learning_rate.
self.learning_rate /= 2.
continue
error = error_
coef = self.coef_ + self.learning_rate * (X.T.dot(error))
if np.isfinite(coef).all(): # if overflow happen, half learning_rate.
self.coef_ = coef
else:
self.learning_rate /= 2.
self.intercept_ = y_offset - self.coef_.dot(X_offset.T)
return self
def predict(self, X):
n_samples, n_features = X.shape
assert(n_features == self.coef_.size)
return X.dot(self.coef_) + self.intercept_
# Simple regression
my_model1 = MyLinearRegression()
my_model1.fit(train_x, train_y)
print my_model1.coef_, my_model1.intercept_
# Quadratic regression
my_model2 = MyLinearRegression()
my_model2.fit(np.concatenate((train_x, train_x**2), axis=1), train_y);
print my_model2.coef_, my_model2.intercept_
# Five-order polynomial regression
my_model5 = MyLinearRegression()
my_model5.fit(np.concatenate((train_x, train_x**2, train_x**3, train_x**4, train_x**5), axis=1), train_y);
print my_model5.coef_, my_model5.intercept_
# predict
my_predict_y1 = my_model1.predict(predict_x)
my_predict_y2 = my_model2.predict(np.concatenate((predict_x, predict_x**2), axis=1))
my_predict_y5 = my_model5.predict(np.concatenate((predict_x, predict_x**2, predict_x**3, predict_x**4, predict_x**5), axis=1))
# plot
plt.figure(figsize = (10,10))
plt.scatter(train_x, train_y, color='black')
plt.plot(predict_x, my_predict_y1, color='blue', label='underfitting')
plt.plot(predict_x, my_predict_y2, color='green', label='fair')
plt.plot(predict_x, my_predict_y5, color='red', label='overfitting')
plt.axis([0,7,0,5])
plt.legend(loc=2)
plt.show()
然后,我得到了不好的结果:
[ 0.53571433] 0.883333191266
[ 1.34821275 -0.11607122] -0.199997815791
[ -1.95681250e+00 -2.20847875e+01 -1.48602362e+02 -9.20144807e+02
-5.56577136e+03] 11678151.1386
我可以在 my_model1
和 my_model2
上取得好成绩 MyLinearRegression
,他们在 sklearn.linear_model.LinearRegression
上对这些关闭。但是,无论我如何调整 learning_rate
和迭代次数, my_model5
就是不收敛。有人可以帮忙吗?
我想编写一个类似于 sklearn.linear_model.LinearRegression
的线性回归模型。首先,我使用 sklearn.linear_model.LinearRegression
:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
# training data
train_x = np.array([1,2,3,4,5,6], dtype=np.float64).reshape(6,1)
train_y = np.array([1,2,3,3.25,3.5,3.8], dtype=np.float64)
# test data
predict_x = np.arange(0, 7, 0.1)
predict_x = predict_x.reshape(predict_x.size, 1)
# Simple regression
model1 = linear_model.LinearRegression()
model1.fit(train_x, train_y);
print model1.coef_, model1.intercept_
# Quadratic regression
model2 = linear_model.LinearRegression()
model2.fit(np.concatenate((train_x, train_x**2), axis=1), train_y);
print model2.coef_, model2.intercept_
# Five-order polynomial regression
model5 = linear_model.LinearRegression()
model5.fit(np.concatenate((train_x, train_x**2, train_x**3, train_x**4, train_x**5), axis=1), train_y);
print model5.coef_, model5.intercept_
# Predict
predict_y1 = model1.predict(predict_x)
predict_y2 = model2.predict(np.concatenate((predict_x, predict_x**2), axis=1))
predict_y5 = model5.predict(np.concatenate((predict_x, predict_x**2, predict_x**3, predict_x**4, predict_x**5), axis=1))
# plot
plt.figure(figsize = (10,10))
plt.scatter(train_x, train_y, color='black')
plt.plot(predict_x, predict_y1, color='blue', label='underfitting')
plt.plot(predict_x, predict_y2, color='green', label='fair')
plt.plot(predict_x, predict_y5, color='red', label='overfitting')
plt.axis([0,7,0,5])
plt.legend(loc=2)
plt.show()
然后,我得到了好的结果:
[ 0.53571429] 0.883333333333
[ 1.34821429 -0.11607143] -0.2
[-8.52333333 7.0625 -2.30833333 0.3375 -0.01833333] 4.45
之后,我实现了我的模型MyLinearRegression
。首先,我选择批量梯度下降和固定迭代次数来测试我的代码是否正确。
# center data
def center_matrix(X):
assert(isinstance(X, np.ndarray))
X_offset = np.average(X, axis=0)
return X - X_offset, X_offset
class MyLinearRegression(object):
def __init__(self):
self.coef_ = None
self.intercept_ = None
self.learning_rate = None
def fit(self, X, y):
n_samples, n_features = X.shape
n_samples_, = y.shape
assert(n_samples == n_samples_)
X, X_offset = center_matrix(X)
y, y_offset = center_matrix(y)
self.coef_ = np.ones((n_features,), dtype=np.float64)
self.learning_rate = -0.0001
error = None
# using fixed iteration number
for epoch in np.arange(500000):
y_hat = X.dot(self.coef_)
error_ = y_hat-y
if error is not None and sum(error_**2) > sum(error**2): # if square error is increasing, then half learning_rate.
self.learning_rate /= 2.
continue
error = error_
coef = self.coef_ + self.learning_rate * (X.T.dot(error))
if np.isfinite(coef).all(): # if overflow happen, half learning_rate.
self.coef_ = coef
else:
self.learning_rate /= 2.
self.intercept_ = y_offset - self.coef_.dot(X_offset.T)
return self
def predict(self, X):
n_samples, n_features = X.shape
assert(n_features == self.coef_.size)
return X.dot(self.coef_) + self.intercept_
# Simple regression
my_model1 = MyLinearRegression()
my_model1.fit(train_x, train_y)
print my_model1.coef_, my_model1.intercept_
# Quadratic regression
my_model2 = MyLinearRegression()
my_model2.fit(np.concatenate((train_x, train_x**2), axis=1), train_y);
print my_model2.coef_, my_model2.intercept_
# Five-order polynomial regression
my_model5 = MyLinearRegression()
my_model5.fit(np.concatenate((train_x, train_x**2, train_x**3, train_x**4, train_x**5), axis=1), train_y);
print my_model5.coef_, my_model5.intercept_
# predict
my_predict_y1 = my_model1.predict(predict_x)
my_predict_y2 = my_model2.predict(np.concatenate((predict_x, predict_x**2), axis=1))
my_predict_y5 = my_model5.predict(np.concatenate((predict_x, predict_x**2, predict_x**3, predict_x**4, predict_x**5), axis=1))
# plot
plt.figure(figsize = (10,10))
plt.scatter(train_x, train_y, color='black')
plt.plot(predict_x, my_predict_y1, color='blue', label='underfitting')
plt.plot(predict_x, my_predict_y2, color='green', label='fair')
plt.plot(predict_x, my_predict_y5, color='red', label='overfitting')
plt.axis([0,7,0,5])
plt.legend(loc=2)
plt.show()
然后,我得到了不好的结果:
[ 0.53571433] 0.883333191266
[ 1.34821275 -0.11607122] -0.199997815791
[ -1.95681250e+00 -2.20847875e+01 -1.48602362e+02 -9.20144807e+02 -5.56577136e+03] 11678151.1386
我可以在 my_model1
和 my_model2
上取得好成绩 MyLinearRegression
,他们在 sklearn.linear_model.LinearRegression
上对这些关闭。但是,无论我如何调整 learning_rate
和迭代次数, my_model5
就是不收敛。有人可以帮忙吗?