简单线性回归不收敛
Simple Linear Regression not converging
为了深入挖掘机器学习模型背后的数学原理,我在 Python 中使用矢量化实现了普通最小二乘算法。我的参考资料是:
- https://github.com/paulaceccon/courses/blob/main/machine_learning_specialization/supervisioned_regression/2_multiple_regression.pdf
- https://www.geeksforgeeks.org/linear-regression-implementation-from-scratch-using-python/
这是我现在拥有的:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
%matplotlib inline
X, y = datasets.load_diabetes(return_X_y=True)
# We only take the first feature (for visualization purposes).
X = X[:, np.newaxis, 2]
# Split the data into training/testing sets
X_train = X[:-20]
X_test = X[-20:]
y_train = y[:-20]
y_test = y[-20:]
# Input data
sns.scatterplot(
x=X_train[:, 0],
y=y_train,
label="train",
edgecolor=None,
color="blue"
)
# To predict
sns.scatterplot(
x=X_test[:, 0],
y=y_test,
label="test",
edgecolor=None,
marker="*",
color="red",
);
class LinearRegression:
"""
Ordinary least squares Linear Regression.
Args:
"""
def __init__(self, learning_rate: float = 0.01, tolerance: float = 1e4, standardize: bool = True):
# TODO: standardize if required
self._learning_rate: float = learning_rate
self._tolerance: float = tolerance
self._standardize: bool = standardize
self._fitted: bool = False
def fit(self, X: np.ndarray, y: np.ndarray) -> None:
"""Fit linear model."""
self._X: np.ndarray = X
self._y: np.ndarray = y[:, np.newaxis]
self._m, self._n = self._X.shape # rows, features
self._weights: np.ndarray = np.zeros((self._n, 1))
self._train()
def predict(self, X: np.ndarray, add_bias: bool = True) -> np.ndarray:
"""Predict using the linear model."""
assert self._fitted, "Model not fitted."
if add_bias:
X = np.c_[np.ones((X.shape[0], 1)), X]
predictions = np.dot(X, self._weights)
return predictions
def _train(self) -> None:
"""
Generate the clusters from the traning data.
Algorithm:
1. Initiliaze weights.
2. Compute the cost.
3. Calculate the gradient.
4. Update weights.
4. Repeat from 2 until convergence.
"""
# Add bias term
self._X = np.c_[np.ones((self._m, 1)), self._X]
self._weights = np.r_[np.ones((1, 1)), self._weights]
self._fitted = True
converged = False
iterations = 0
while not converged:
iterations += 1
y_hat = self.predict(self._X, add_bias=False)
residuals = self._residuals(self._y, y_hat)
gradients = self._gradients(self._X, residuals)
self._weights -= self._learning_rate * gradients
gradient_magnitude = np.linalg.norm(gradients)
print(gradient_magnitude)
if gradient_magnitude < self._tolerance:
converged = True
print(self._weights)
print(iterations)
def _residuals(self, y: np.ndarray, y_hat: np.ndarray) -> np.ndarray:
residuals = y - y_hat
return residuals
def _gradients(self, X: np.ndarray, residuals: np.ndarray) -> np.ndarray:
gradients = -2 * np.dot(X.T, residuals)
return gradients
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
clf = LinearRegression()
clf.fit(X_train, y_train)
我面临的问题是我的体重一直在增加,直到我最终得到一堆 nans。我一直在试图找出我所缺少的东西,但到目前为止还没有运气。还尝试调整容差阈值,但我不认为这是问题所在,但我的数学有问题。
您的代码似乎确实可以正常工作;除了学习率,真的!只需将其从 0.01
减少到例如0.0001
并且一切正常(好吧,我也会将容忍度降低到更小的东西,比如 1e-5
,以确保它实际上收敛到正确的解决方案)。
显示它有效的小图:
clf = LinearRegression(learning_rate=0.0001)
clf.fit(X_train, y_train)
b, m = clf._weights[:, 0]
plt.scatter(X_train[:, 0], y_train)
plt.plot([-2, 4], [-2 * m + b, 4 * m + b])
给予
线性回归是一个凸优化问题,所以你可以把它想象成把一个球放在抛物线上,然后将它向底部移动一个固定的量 space 乘以你所处位置的斜率在。如果这个“固定数量”足够小,你就会越来越接近底部,直到找到最佳位置。但是如果你得到的值太大,你就会从抛物线的一侧跳到另一侧,如果它足够大,你就会降落在一个实际上比你开始的地方 更高 的地方.重复几次,您确实会遇到您遇到的确切情况...
为了深入挖掘机器学习模型背后的数学原理,我在 Python 中使用矢量化实现了普通最小二乘算法。我的参考资料是:
- https://github.com/paulaceccon/courses/blob/main/machine_learning_specialization/supervisioned_regression/2_multiple_regression.pdf
- https://www.geeksforgeeks.org/linear-regression-implementation-from-scratch-using-python/
这是我现在拥有的:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
%matplotlib inline
X, y = datasets.load_diabetes(return_X_y=True)
# We only take the first feature (for visualization purposes).
X = X[:, np.newaxis, 2]
# Split the data into training/testing sets
X_train = X[:-20]
X_test = X[-20:]
y_train = y[:-20]
y_test = y[-20:]
# Input data
sns.scatterplot(
x=X_train[:, 0],
y=y_train,
label="train",
edgecolor=None,
color="blue"
)
# To predict
sns.scatterplot(
x=X_test[:, 0],
y=y_test,
label="test",
edgecolor=None,
marker="*",
color="red",
);
class LinearRegression:
"""
Ordinary least squares Linear Regression.
Args:
"""
def __init__(self, learning_rate: float = 0.01, tolerance: float = 1e4, standardize: bool = True):
# TODO: standardize if required
self._learning_rate: float = learning_rate
self._tolerance: float = tolerance
self._standardize: bool = standardize
self._fitted: bool = False
def fit(self, X: np.ndarray, y: np.ndarray) -> None:
"""Fit linear model."""
self._X: np.ndarray = X
self._y: np.ndarray = y[:, np.newaxis]
self._m, self._n = self._X.shape # rows, features
self._weights: np.ndarray = np.zeros((self._n, 1))
self._train()
def predict(self, X: np.ndarray, add_bias: bool = True) -> np.ndarray:
"""Predict using the linear model."""
assert self._fitted, "Model not fitted."
if add_bias:
X = np.c_[np.ones((X.shape[0], 1)), X]
predictions = np.dot(X, self._weights)
return predictions
def _train(self) -> None:
"""
Generate the clusters from the traning data.
Algorithm:
1. Initiliaze weights.
2. Compute the cost.
3. Calculate the gradient.
4. Update weights.
4. Repeat from 2 until convergence.
"""
# Add bias term
self._X = np.c_[np.ones((self._m, 1)), self._X]
self._weights = np.r_[np.ones((1, 1)), self._weights]
self._fitted = True
converged = False
iterations = 0
while not converged:
iterations += 1
y_hat = self.predict(self._X, add_bias=False)
residuals = self._residuals(self._y, y_hat)
gradients = self._gradients(self._X, residuals)
self._weights -= self._learning_rate * gradients
gradient_magnitude = np.linalg.norm(gradients)
print(gradient_magnitude)
if gradient_magnitude < self._tolerance:
converged = True
print(self._weights)
print(iterations)
def _residuals(self, y: np.ndarray, y_hat: np.ndarray) -> np.ndarray:
residuals = y - y_hat
return residuals
def _gradients(self, X: np.ndarray, residuals: np.ndarray) -> np.ndarray:
gradients = -2 * np.dot(X.T, residuals)
return gradients
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
clf = LinearRegression()
clf.fit(X_train, y_train)
我面临的问题是我的体重一直在增加,直到我最终得到一堆 nans。我一直在试图找出我所缺少的东西,但到目前为止还没有运气。还尝试调整容差阈值,但我不认为这是问题所在,但我的数学有问题。
您的代码似乎确实可以正常工作;除了学习率,真的!只需将其从 0.01
减少到例如0.0001
并且一切正常(好吧,我也会将容忍度降低到更小的东西,比如 1e-5
,以确保它实际上收敛到正确的解决方案)。
显示它有效的小图:
clf = LinearRegression(learning_rate=0.0001)
clf.fit(X_train, y_train)
b, m = clf._weights[:, 0]
plt.scatter(X_train[:, 0], y_train)
plt.plot([-2, 4], [-2 * m + b, 4 * m + b])
给予
线性回归是一个凸优化问题,所以你可以把它想象成把一个球放在抛物线上,然后将它向底部移动一个固定的量 space 乘以你所处位置的斜率在。如果这个“固定数量”足够小,你就会越来越接近底部,直到找到最佳位置。但是如果你得到的值太大,你就会从抛物线的一侧跳到另一侧,如果它足够大,你就会降落在一个实际上比你开始的地方 更高 的地方.重复几次,您确实会遇到您遇到的确切情况...