如何为我的线性回归获得正确的斜率变化量?
How do I get the right amount of change to the slope for my linear regression?
我想用 Processing 编写线性回归程序。但是我混淆了我必须乘以哪些参数,然后从我的斜率中加上或减去。
我试过改变参数(使它们为负,改变学习率)。 b 确实有效,但我在正确设置斜率方面遇到了一些问题。
//Data
float[] P1 = {100,100};
float[] P2 = {200,300};
float[] P3 = {300,250};
float[][] allData = {P1,P2,P3};
//random start values
float w1 = random(0,3);
float b = random(-100,100);
float learningRate = 0.01;
int i = 0;
void setup(){
size(1000,1000);
}
void draw(){
background(255);
axes();
//Draw Points
for(int j=0;j<allData.length;j+=1){
float[] point = allData[j];
advancedPoint(point[0],point[1],color(181, 16, 32),10);
}
//Gradient descend, thats the confusing part...
if(i<10000){
i += 1;
float dcost_dreg = 0;
float dcost_dtar = 0;
for(int j=0;j<allData.length;j+=1){
float[] point = allData[j];
float yTarget = point[1];
float yRegression = w1*point[0] + b;
dcost_dreg += -2*(yRegression-yTarget); //I don't understand these lines
dcost_dtar += -2*(yRegression-yTarget)*point[0];
}
w1 += learningRate * (dcost_dtar/allData.length);
b += learningRate * (dcost_dreg/allData.length) ;//until here
}
//Draw Regression
linearPoints(w1, b);
}
void linearPoints (float w1, float b){
float y;
for(float x=-width; x<width; x=x+0.25){
y = w1*x + b;
strokeWeight(3);
stroke(100,100);
point(x+width/2, -y + height/2);
}
}
void axes(){
for(float a=0; a<height; a=a+0.25){
strokeWeight(1);
stroke(255,100,0);
point(width/2,a);
}
for(float b=0; b<width; b=b+0.25){
stroke(255,100,0);
point(b,height/2);
}
}
void advancedPoint(float x,float y, color c, int size){
strokeWeight(size);
stroke(c);
point(x+width/2,-y+height/2);
}
理论上,程序应该通过我的数据拟合线性回归。
线性回归基于 Line 形式的方程
y = w1 * x + b
条款
dcost_dreg += -2*(yRegression-yTarget);
dcost_dtar += -2*(yRegression-yTarget)*point[0];
应该计算线方程与样本点比较的误差,但你的计算是错误的。
常数误差(b
误差)是样本y坐标与样本x坐标上的直线方程计算的y坐标之差.
通过梯度差计算线性误差(w1
误差)。梯度差是高度和宽度的商(y/x)而不是乘积。
这意味着计算必须是:
dcost_dreg += (yTarget-yRegression);
dcost_dtar += (yTarget-yRegression)/point[0];
表达式
w1 += learningRate * (dcost_dtar/allData.length);
b += learningRate * (dcost_dreg/allData.length);
计算样本的平均误差,并根据学习率对线性方程进行修正。
修改函数draw
解决问题:
void draw(){
background(255);
axes();
//Draw Points
for(int j=0;j<allData.length;j+=1){
float[] point = allData[j];
advancedPoint(point[0],point[1],color(181, 16, 32),10);
}
//Gradient descend, thats the confusing part...
if(i<10000){
i += 1;
float dcost_dreg = 0;
float dcost_dtar = 0;
for(int j=0;j<allData.length;j+=1){
float[] point = allData[j];
float yTarget = point[1];
float yRegression = w1*point[0] + b;
dcost_dreg += (yTarget-yRegression);
dcost_dtar += (yTarget-yRegression)/point[0];
}
w1 += learningRate * (dcost_dtar/allData.length);
b += learningRate * (dcost_dreg/allData.length);
}
//Draw Regression
linearPoints(w1, b);
}
顺便推荐使用line()
绘制坐标轴和当前直线方程:
void linearPoints (float w1, float b){
strokeWeight(3);
stroke(100,100,255);
float x0 = -width;
float x1 = width;
float y0 = x0 * w1 + b;
float y1 = x1 * w1 + b;
line(x0+width/2, -y0+height/2, x1+width/2, -y1+height/2);
}
void axes(){
strokeWeight(1);
stroke(255,100,0);
line(0,height/2, width, height/2);
line(width/2, 0, width/2, height);
}
我想用 Processing 编写线性回归程序。但是我混淆了我必须乘以哪些参数,然后从我的斜率中加上或减去。
我试过改变参数(使它们为负,改变学习率)。 b 确实有效,但我在正确设置斜率方面遇到了一些问题。
//Data
float[] P1 = {100,100};
float[] P2 = {200,300};
float[] P3 = {300,250};
float[][] allData = {P1,P2,P3};
//random start values
float w1 = random(0,3);
float b = random(-100,100);
float learningRate = 0.01;
int i = 0;
void setup(){
size(1000,1000);
}
void draw(){
background(255);
axes();
//Draw Points
for(int j=0;j<allData.length;j+=1){
float[] point = allData[j];
advancedPoint(point[0],point[1],color(181, 16, 32),10);
}
//Gradient descend, thats the confusing part...
if(i<10000){
i += 1;
float dcost_dreg = 0;
float dcost_dtar = 0;
for(int j=0;j<allData.length;j+=1){
float[] point = allData[j];
float yTarget = point[1];
float yRegression = w1*point[0] + b;
dcost_dreg += -2*(yRegression-yTarget); //I don't understand these lines
dcost_dtar += -2*(yRegression-yTarget)*point[0];
}
w1 += learningRate * (dcost_dtar/allData.length);
b += learningRate * (dcost_dreg/allData.length) ;//until here
}
//Draw Regression
linearPoints(w1, b);
}
void linearPoints (float w1, float b){
float y;
for(float x=-width; x<width; x=x+0.25){
y = w1*x + b;
strokeWeight(3);
stroke(100,100);
point(x+width/2, -y + height/2);
}
}
void axes(){
for(float a=0; a<height; a=a+0.25){
strokeWeight(1);
stroke(255,100,0);
point(width/2,a);
}
for(float b=0; b<width; b=b+0.25){
stroke(255,100,0);
point(b,height/2);
}
}
void advancedPoint(float x,float y, color c, int size){
strokeWeight(size);
stroke(c);
point(x+width/2,-y+height/2);
}
理论上,程序应该通过我的数据拟合线性回归。
线性回归基于 Line 形式的方程
y = w1 * x + b
条款
dcost_dreg += -2*(yRegression-yTarget); dcost_dtar += -2*(yRegression-yTarget)*point[0];
应该计算线方程与样本点比较的误差,但你的计算是错误的。
常数误差(b
误差)是样本y坐标与样本x坐标上的直线方程计算的y坐标之差.
通过梯度差计算线性误差(w1
误差)。梯度差是高度和宽度的商(y/x)而不是乘积。
这意味着计算必须是:
dcost_dreg += (yTarget-yRegression);
dcost_dtar += (yTarget-yRegression)/point[0];
表达式
w1 += learningRate * (dcost_dtar/allData.length); b += learningRate * (dcost_dreg/allData.length);
计算样本的平均误差,并根据学习率对线性方程进行修正。
修改函数draw
解决问题:
void draw(){
background(255);
axes();
//Draw Points
for(int j=0;j<allData.length;j+=1){
float[] point = allData[j];
advancedPoint(point[0],point[1],color(181, 16, 32),10);
}
//Gradient descend, thats the confusing part...
if(i<10000){
i += 1;
float dcost_dreg = 0;
float dcost_dtar = 0;
for(int j=0;j<allData.length;j+=1){
float[] point = allData[j];
float yTarget = point[1];
float yRegression = w1*point[0] + b;
dcost_dreg += (yTarget-yRegression);
dcost_dtar += (yTarget-yRegression)/point[0];
}
w1 += learningRate * (dcost_dtar/allData.length);
b += learningRate * (dcost_dreg/allData.length);
}
//Draw Regression
linearPoints(w1, b);
}
顺便推荐使用line()
绘制坐标轴和当前直线方程:
void linearPoints (float w1, float b){
strokeWeight(3);
stroke(100,100,255);
float x0 = -width;
float x1 = width;
float y0 = x0 * w1 + b;
float y1 = x1 * w1 + b;
line(x0+width/2, -y0+height/2, x1+width/2, -y1+height/2);
}
void axes(){
strokeWeight(1);
stroke(255,100,0);
line(0,height/2, width, height/2);
line(width/2, 0, width/2, height);
}