在 Python 中实施梯度下降并收到溢出错误
Implementing Gradient Descent In Python and receiving an overflow error
梯度下降和溢出误差
我目前正在 python 中实现矢量化梯度下降。但是,我继续收到溢出错误。不过,我数据集中的数字并不是很大。我正在使用这个公式:
我选择这个实现是为了避免使用衍生工具。有没有人对如何解决这个问题有任何建议,或者我实施错了吗?提前致谢!
数据集Link:https://www.kaggle.com/CooperUnion/anime-recommendations-database/data
## Cleaning Data ##
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
data = pd.read_csv('anime.csv')
# print(data.corr())
# print(data['members'].isnull().values.any()) # Prints False
# print(data['rating'].isnull().values.any()) # Prints True
members = [] # Corresponding fan club size for row
ratings = [] # Corresponding rating for row
for row in data.iterrows():
if not math.isnan(row[1]['rating']): # Checks for Null ratings
members.append(row[1]['members'])
ratings.append(row[1]['rating'])
plt.plot(members, ratings)
plt.savefig('scatterplot.png')
theta0 = 0.3 # Random guess
theta1 = 0.3 # Random guess
error = 0
公式的
def hypothesis(x, theta0, theta1):
return theta0 + theta1 * x
def costFunction(x, y, theta0, theta1, m):
loss = 0
for i in range(m): # Represents summation
loss += (hypothesis(x[i], theta0, theta1) - y[i])**2
loss *= 1 / (2 * m) # Represents 1/2m
return loss
def gradientDescent(x, y, theta0, theta1, alpha, m, iterations=1500):
for i in range(iterations):
gradient0 = 0
gradient1 = 0
for j in range(m):
gradient0 += hypothesis(x[j], theta0, theta1) - y[j]
gradient1 += (hypothesis(x[j], theta0, theta1) - y[j]) * x[j]
gradient0 *= 1/m
gradient1 *= 1/m
temp0 = theta0 - alpha * gradient0
temp1 = theta1 - alpha * gradient1
theta0 = temp0
theta1 = temp1
error = costFunction(x, y, theta0, theta1, len(y))
print("Error is:", error)
return theta0, theta1
print(gradientDescent(members, ratings, theta0, theta1, 0.01, len(ratings)))
错误
经过几次迭代后,我的 costFunction 在我的 gradientDescent 函数中被调用,给我一个 OverflowError: (34, 'Result too large')。但是,我希望我的代码能够不断打印出递减的错误值。
Error is: 1.7515692852199285e+23
Error is: 2.012089675182454e+38
Error is: 2.3113586742689143e+53
Error is: 2.6551395730578252e+68
Error is: 3.05005286756189e+83
Error is: 3.503703756035943e+98
Error is: 4.024828599077087e+113
Error is: 4.623463163528686e+128
Error is: 5.311135890211131e+143
Error is: 6.101089907410428e+158
Error is: 7.008538065634975e+173
Error is: 8.050955905074458e+188
Error is: 9.248418197694096e+203
Error is: 1.0623985545062037e+219
Error is: 1.220414847696018e+234
Error is: 1.4019337603196565e+249
Error is: 1.6104509643047377e+264
Error is: 1.8499820618048921e+279
Error is: 2.1251399172389593e+294
Traceback (most recent call last):
File "tyreeGradientDescent.py", line 54, in <module>
print(gradientDescent(members, ratings, theta0, theta1, 0.01, len(ratings)))
File "tyreeGradientDescent.py", line 50, in gradientDescent
error = costFunction(x, y, theta0, theta1, len(y))
File "tyreeGradientDescent.py", line 33, in costFunction
loss += (hypothesis(x[i], theta0, theta1) - y[i])**2
OverflowError: (34, 'Result too large')
你的数据值真的很大,这使得你的损失函数非常陡峭。结果是您需要 tiny alpha,除非您将数据标准化为较小的值。如果 alpha 值太大,您的梯度下降会到处跳跃并且实际上会发散,这就是错误率上升而不是下降的原因。
根据您当前的数据,0.0000000001
的 alpha 将使误差收敛。 30 次迭代后,我的损失从 :
Error is: 66634985.91339202
至
Error is: 16.90452378179708
import numpy as np
import pandas as pd
X = [0.5, 2.5]
Y = [0.2, 0.9]
def f(w, b, x): #sigmoid with parameter w,b
return 1.0/(1.0 * np.exp(-(w * x + b)))
def error(w, b):
err = 0.0
for x, y in zip(X, Y):
fx = f(w, b, x)
err += 0.5 * (fx - y)**2
return err
def grad_b(w, b, x, y):
fx = f(w, b, x)
return (fx - y) * fx * (1 - fx)
def grad_w(w, b, x, y):
fx = f(w, b, x)
return (fx - y) * fx * (1 - fx) * x
def do_gradient_descent():
w, b, eta, max_epochs = 1, 1, 0.01, 100
for i in range(max_epochs):
dw, db = 0, 0
for x, y in zip(X, Y):
dw += grad_w(w, b, x, y)
db += grad_b(w, b, x, y)
w = w - eta * dw
print(w)
b = b - eta * db
print(b)
er = error(w, b)
#print(er)
return er
##Calling Gradient Descent function
do_gradient_descent()
梯度下降和溢出误差
我目前正在 python 中实现矢量化梯度下降。但是,我继续收到溢出错误。不过,我数据集中的数字并不是很大。我正在使用这个公式:
数据集Link:https://www.kaggle.com/CooperUnion/anime-recommendations-database/data
## Cleaning Data ##
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
data = pd.read_csv('anime.csv')
# print(data.corr())
# print(data['members'].isnull().values.any()) # Prints False
# print(data['rating'].isnull().values.any()) # Prints True
members = [] # Corresponding fan club size for row
ratings = [] # Corresponding rating for row
for row in data.iterrows():
if not math.isnan(row[1]['rating']): # Checks for Null ratings
members.append(row[1]['members'])
ratings.append(row[1]['rating'])
plt.plot(members, ratings)
plt.savefig('scatterplot.png')
theta0 = 0.3 # Random guess
theta1 = 0.3 # Random guess
error = 0
公式的
def hypothesis(x, theta0, theta1):
return theta0 + theta1 * x
def costFunction(x, y, theta0, theta1, m):
loss = 0
for i in range(m): # Represents summation
loss += (hypothesis(x[i], theta0, theta1) - y[i])**2
loss *= 1 / (2 * m) # Represents 1/2m
return loss
def gradientDescent(x, y, theta0, theta1, alpha, m, iterations=1500):
for i in range(iterations):
gradient0 = 0
gradient1 = 0
for j in range(m):
gradient0 += hypothesis(x[j], theta0, theta1) - y[j]
gradient1 += (hypothesis(x[j], theta0, theta1) - y[j]) * x[j]
gradient0 *= 1/m
gradient1 *= 1/m
temp0 = theta0 - alpha * gradient0
temp1 = theta1 - alpha * gradient1
theta0 = temp0
theta1 = temp1
error = costFunction(x, y, theta0, theta1, len(y))
print("Error is:", error)
return theta0, theta1
print(gradientDescent(members, ratings, theta0, theta1, 0.01, len(ratings)))
错误
经过几次迭代后,我的 costFunction 在我的 gradientDescent 函数中被调用,给我一个 OverflowError: (34, 'Result too large')。但是,我希望我的代码能够不断打印出递减的错误值。
Error is: 1.7515692852199285e+23
Error is: 2.012089675182454e+38
Error is: 2.3113586742689143e+53
Error is: 2.6551395730578252e+68
Error is: 3.05005286756189e+83
Error is: 3.503703756035943e+98
Error is: 4.024828599077087e+113
Error is: 4.623463163528686e+128
Error is: 5.311135890211131e+143
Error is: 6.101089907410428e+158
Error is: 7.008538065634975e+173
Error is: 8.050955905074458e+188
Error is: 9.248418197694096e+203
Error is: 1.0623985545062037e+219
Error is: 1.220414847696018e+234
Error is: 1.4019337603196565e+249
Error is: 1.6104509643047377e+264
Error is: 1.8499820618048921e+279
Error is: 2.1251399172389593e+294
Traceback (most recent call last):
File "tyreeGradientDescent.py", line 54, in <module>
print(gradientDescent(members, ratings, theta0, theta1, 0.01, len(ratings)))
File "tyreeGradientDescent.py", line 50, in gradientDescent
error = costFunction(x, y, theta0, theta1, len(y))
File "tyreeGradientDescent.py", line 33, in costFunction
loss += (hypothesis(x[i], theta0, theta1) - y[i])**2
OverflowError: (34, 'Result too large')
你的数据值真的很大,这使得你的损失函数非常陡峭。结果是您需要 tiny alpha,除非您将数据标准化为较小的值。如果 alpha 值太大,您的梯度下降会到处跳跃并且实际上会发散,这就是错误率上升而不是下降的原因。
根据您当前的数据,0.0000000001
的 alpha 将使误差收敛。 30 次迭代后,我的损失从 :
Error is: 66634985.91339202
至
Error is: 16.90452378179708
import numpy as np
import pandas as pd
X = [0.5, 2.5]
Y = [0.2, 0.9]
def f(w, b, x): #sigmoid with parameter w,b
return 1.0/(1.0 * np.exp(-(w * x + b)))
def error(w, b):
err = 0.0
for x, y in zip(X, Y):
fx = f(w, b, x)
err += 0.5 * (fx - y)**2
return err
def grad_b(w, b, x, y):
fx = f(w, b, x)
return (fx - y) * fx * (1 - fx)
def grad_w(w, b, x, y):
fx = f(w, b, x)
return (fx - y) * fx * (1 - fx) * x
def do_gradient_descent():
w, b, eta, max_epochs = 1, 1, 0.01, 100
for i in range(max_epochs):
dw, db = 0, 0
for x, y in zip(X, Y):
dw += grad_w(w, b, x, y)
db += grad_b(w, b, x, y)
w = w - eta * dw
print(w)
b = b - eta * db
print(b)
er = error(w, b)
#print(er)
return er
##Calling Gradient Descent function
do_gradient_descent()