如何使用 gsl 计算多项式回归数据点?

How can I use gsl to calculate polynomial regression data points?

(免责声明:我的数学很糟糕,我来自 JavaScript,所以对于任何不准确的地方,我深表歉意,并会尽力纠正。)

Rosetta Code 上的 example 展示了如何使用 gsl 计算系数。这是代码:

polifitgsl.h:

#ifndef _POLIFITGSL_H
#define _POLIFITGSL_H
#include <gsl/gsl_multifit.h>
#include <stdbool.h>
#include <math.h>
bool polynomialfit(int obs, int degree, 
           double *dx, double *dy, double *store); /* n, p */
#endif

polifitgsl.cpp:

#include "polifitgsl.h"

bool polynomialfit(int obs, int degree, 
           double *dx, double *dy, double *store) /* n, p */
{
  gsl_multifit_linear_workspace *ws;
  gsl_matrix *cov, *X;
  gsl_vector *y, *c;
  double chisq;

  int i, j;

  X = gsl_matrix_alloc(obs, degree);
  y = gsl_vector_alloc(obs);
  c = gsl_vector_alloc(degree);
  cov = gsl_matrix_alloc(degree, degree);

  for(i=0; i < obs; i++) {
    for(j=0; j < degree; j++) {
      gsl_matrix_set(X, i, j, pow(dx[i], j));
    }
    gsl_vector_set(y, i, dy[i]);
  }

  ws = gsl_multifit_linear_alloc(obs, degree);
  gsl_multifit_linear(X, y, c, cov, &chisq, ws);

  /* store result ... */
  for(i=0; i < degree; i++)
  {
    store[i] = gsl_vector_get(c, i);
  }

  gsl_multifit_linear_free(ws);
  gsl_matrix_free(X);
  gsl_matrix_free(cov);
  gsl_vector_free(y);
  gsl_vector_free(c);
  return true; /* we do not "analyse" the result (cov matrix mainly)
          to know if the fit is "good" */
}

main.cpp(注意我已经用我自己的替换了 x 和 y 的样本编号):

#include <stdio.h>

#include "polifitgsl.h"

#define NP 11
double x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = {98.02, 98.01, 98.01, 98.02, 97.98, 97.97, 97.96, 97.94, 97.96, 97.96, 97.97, 97.97, 97.94, 97.94, 97.94, 97.92, 97.96, 97.9, 97.85, 97.9};

#define DEGREE 3
double coeff[DEGREE];

int main()
{
  int i;

  polynomialfit(NP, DEGREE, x, y, coeff);
  for(i=0; i < DEGREE; i++) {
    printf("%lf\n", coeff[i]);
  }
  return 0;
}

这是输出:

98.030909
-0.016182
0.000909

所以这给了我系数。但我真正想要的是实际的拟合点。在JavaScript中,我使用了regression package来计算点数:

var regression = require('regression');

var calculateRegression = function(values, degree) {
    var data = [];
    var regressionOutput;
    var valuesCount = values.length;
    var i = 0;

    // Format the data in a way the regression library expects.
    for (i = 0; i < valuesCount; i++) {
        data[i] = [i, values[i]];
    }

    // Use the library to calculate the regression.
    regressionOutput = regression('polynomial', data, degree);

    return regressionOutput;
};

var y = [98.02, 98.01, 98.01, 98.02, 97.98, 97.97, 97.96, 97.94, 97.96, 97.96, 97.97, 97.97, 97.94, 97.94, 97.94, 97.92, 97.96, 97.9, 97.85, 97.9];

console.log(calculateRegression(y, 3));

产生:

{ equation: 
   [ 98.02987916431594,
     -0.017378390369880512,
     0.0015748071645344357,
     -0.00005721503635571101 ],
  points: 
   [ [ 0, 98.02987916431594 ],
     [ 1, 98.01401836607424 ],
     [ 2, 98.00096389194348 ],
     [ 3, 97.9903724517055 ],
     [ 4, 97.98190075514219 ],
     [ 5, 97.97520551203543 ],
     [ 6, 97.96994343216707 ],
     [ 7, 97.96577122531896 ],
     [ 8, 97.96234560127297 ],
     [ 9, 97.959323269811 ],
     [ 10, 97.95636094071487 ],
     [ 11, 97.95311532376647 ],
     [ 12, 97.94924312874768 ],
     [ 13, 97.94440106544033 ],
     [ 14, 97.93824584362629 ],
     [ 15, 97.93043417308745 ],
     [ 16, 97.92062276360569 ],
     [ 17, 97.90846832496283 ],
     [ 18, 97.89362756694074 ],
     [ 19, 97.87575719932133 ] ],
  string: 'y = 0x^3 + 0x^2 + -0.02x + 98.03' }

(请注意 JavaScript 中存在浮点数问题,因此数字并不完全准确。)

points 这是我想要使用 gsl 生成的内容。我有办法做到这一点吗?

查德,如果我明白你的需要,你只需要使用你在 polynomialfit 函数中找到的系数,根据多项式方程计算值。计算系数后,您可以使用以下等式找到任何 x(对于 DEGREE = 3)的值 y

y = x^2*(coeff2) + x*(coeff1) + coeff0

或 C 语法

y = x*x*coeff[2] + x*coeff[1] + coeff[0];

您可以按如下方式修改 main.cpp(您应该将 main.cpp 重命名为 main.c 并将 polyfitgsl.cpp 重命名为 polyfitgsl.c -- 因为它们都是C 源文件,不是 C++)

#include <stdio.h>

#include "polifitgsl.h"

#define NP 20
double x[] = {  0,  1,  2,  3,  4,  5,  6,  7,  8, 9,
               10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
               97.97, 97.96, 97.94, 97.96, 97.96,
               97.97, 97.97, 97.94, 97.94, 97.94,
               97.92, 97.96, 97.9,  97.85, 97.9 };

#define DEGREE 3
double coeff[DEGREE];

int main (void)
{
    int i;

    polynomialfit (NP, DEGREE, x, y, coeff);

    printf ("\n polynomial coefficients:\n\n");
    for (i = 0; i < DEGREE; i++) {
        printf ("  coeff[%d] : %11.7lf\n", i, coeff[i]);
    }
    putchar ('\n');

    printf (" computed values:\n\n   x    y\n");
    for (i = 0; i < (int)(sizeof x/sizeof *x); i++)
        printf ("  %2d, %11.7lf\n", i, 
                i*i*coeff[2] + i*coeff[1] + coeff[0]);
    putchar ('\n');

    return 0;
}

提供以下输出:

例子Use/Output

$ ./bin/main

 polynomial coefficients:

  coeff[0] :  98.0132468
  coeff[1] :  -0.0053003
  coeff[2] :  -0.0000558

 computed values:

   x    y
   0,  98.0132468
   1,  98.0078906
   2,  98.0024229
   3,  97.9968435
   4,  97.9911524
   5,  97.9853497
   6,  97.9794354
   7,  97.9734094
   8,  97.9672718
   9,  97.9610226
  10,  97.9546617
  11,  97.9481891
  12,  97.9416049
  13,  97.9349091
  14,  97.9281016
  15,  97.9211825
  16,  97.9141517
  17,  97.9070093
  18,  97.8997553
  19,  97.8923896

这似乎是你所要求的。查看它,比较您的价值观,如果您需要其他帮助,请告诉我。


进一步清理

只是为了进一步清理代码,因为不需要 xycoeff 的全局声明,并使用 enum 来定义常量 DEGREENP,更简洁的版本是:

#include <stdio.h>

#include "polifitgsl.h"

enum { DEGREE = 3, NP = 20 };

int main (void)
{
    int i;
    double x[] = {  0,  1,  2,  3,  4,  5,  6,  7,  8, 9,
                   10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
    double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
                   97.97, 97.96, 97.94, 97.96, 97.96,
                   97.97, 97.97, 97.94, 97.94, 97.94,
                   97.92, 97.96, 97.9,  97.85, 97.9 };
    double coeff[DEGREE] = {0};


    polynomialfit (NP, DEGREE, x, y, coeff);

    printf ("\n polynomial coefficients:\n\n");
    for (i = 0; i < DEGREE; i++)
        printf ("  coeff[%d] : %11.7lf\n", i, coeff[i]);
    putchar ('\n');

    printf (" computed values:\n\n   x    y\n");
    for (i = 0; i < (int)(sizeof x/sizeof *x); i++)
        printf ("  %2d, %11.7lf\n", i, 
                i*i*coeff[2] + i*coeff[1] + coeff[0]);
    putchar ('\n');

    return 0;
}

我遇到了同样的问题并采用了上面的答案并添加了直接计算的解决方案(没有 gsl)取自 http://www.cplusplus.com/forum/general/181580/

在下面,您会找到一个独立的测试程序,其中包含基于 gsl 的直接计算解决方案。

我进行了一些分析运行,直接计算的性能令人印象深刻 在我的系统上高出 65 倍,5.22 秒对 1000 次调用相应函数的 0.08 秒.

附带说明一下,直接计算使用 Cramer 规则,因此您应该小心处理病态数据。通常我会避免 Cramer's,但对 3x3 系统使用完整的线性系统求解器对我来说似乎有点过分了。

/*
* =====================================================================================
*
*       Filename:  polyfit.cpp
*
*    Description:  Least squares fit of second order polynomials 
*                  Test program using gsl and direct calculation
*        Version:  1.0
*        Created:  2017-07-17 09:32:55
*       Compiler:  gcc
*
*         Author:  Bernhard Brunner, brb_blog@epr.ch
*     References:  This code was merged, adapted and optimized from these two sources:
*                  http://www.cplusplus.com/forum/general/181580/
*                  
*                  http://brb.epr.ch/blog/blog:least_squares_regression_of_parabola
*          Build:  compile and link using 
*                  g++    -c -o polifitgsl.o polifitgsl.cpp
*                  gcc  polifitgsl.o -lgsl -lm -lblas -o polifitgsl
* 
*      Profiling:
*                  valgrind --tool=callgrind ./polifitgsl
*                  kcachegrind
*                  
*                  polynomialfit        takes 5.22s for 1000 calls
*                  findQuadCoefficients takes 0.08s for 1000 calls
*                   65x faster
* =====================================================================================
*/

#include <stdio.h>
#include <gsl/gsl_multifit.h>
#include <stdbool.h>
#include <math.h>

bool polynomialfit(int obs, int degree, 
        double *dx, double *dy, double *store); /* n, p */
double x[] = {  0,  1,  2,  3,  4,  5,  6,  7,  8, 9,
            10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
            97.97, 97.96, 97.94, 97.96, 97.96,
            97.97, 97.97, 97.94, 97.94, 97.94,
            97.92, 97.96, 97.9,  97.85, 97.9 };

#define NP (sizeof(x)/sizeof(double)) // 20

#define DEGREE 3
double coeff[DEGREE];

bool findQuadCoefficients(double timeArray[], double valueArray[], double *coef, double &critPoint, int PointsNum){
    const double S00=PointsNum;//points number
    double S40=0, S10=0, S20=0, S30=0, S01=0, S11=0, S21 = 0;
//    const double MINvalue = valueArray[0];
//    const double MINtime = timeArray[0];
    for (int i=0; i<PointsNum; i++ ){
        double value = valueArray[i]; //  - MINvalue); //normalizing
//      cout << "i=" << i << " index=" << index << " value=" << value << endl;

        int index = timeArray[i]; //  - MINtime;
        int index2 = index * index;
        int index3 = index2 * index;
        int index4 = index3 * index;

        S40+= index4;
        S30+= index3; 
        S20+= index2;
        S10+= index;

        S01 += value;
        S11 += value*index;
        S21 += value*index2;
    }

    double S20squared = S20*S20;

    //minors M_ij=M_ji
    double M11 = S20*S00 - S10*S10;
    double M21 = S30*S00 - S20*S10;
    double M22 = S40*S00 - S20squared;
    double M31 = S30*S10 - S20squared;

    double M32 = S40*S10 - S20*S30;
//  double M33 = S40*S20 - pow(S30,2);

    double discriminant = S40*M11 - S30*M21 + S20*M31;
//    printf("discriminant :%lf\n", discriminant);
    if (abs(discriminant) < .00000000001) return  false;

    double Da = S21*M11
               -S11*M21
               +S01*M31;
    coef[2] = Da/discriminant;
//  cout << "discriminant=" << discriminant;
//  cout << " Da=" << Da;

    double Db = -S21*M21
                +S11*M22
                -S01*M32;
    coef[1] = Db/discriminant;
//  cout << " Db=" << Db << endl;

    double Dc =   S40*(S20*S01 - S10*S11) 
                - S30*(S30*S01 - S10*S21) 
                + S20*(S30*S11 - S20*S21);
    coef[0] = Dc/discriminant;
//    printf("c=%lf\n", c);

    critPoint = -Db/(2*Da); // + MINtime; //-b/(2*a)= -Db/discriminant / (2*(Da/discriminant)) = -Db/(2*Da);

    return true;
}

bool polynomialfit(int obs, int degree, 
        double *dx, double *dy, double *store) /* n, p */
{
gsl_multifit_linear_workspace *ws;
gsl_matrix *cov, *X;
gsl_vector *y, *c;
double chisq;

int i, j;

X = gsl_matrix_alloc(obs, degree);
y = gsl_vector_alloc(obs);
c = gsl_vector_alloc(degree);
cov = gsl_matrix_alloc(degree, degree);

for(i=0; i < obs; i++) {
    for(j=0; j < degree; j++) {
        gsl_matrix_set(X, i, j, pow(dx[i], j));
    }
    gsl_vector_set(y, i, dy[i]);
}

ws = gsl_multifit_linear_alloc(obs, degree);
gsl_multifit_linear(X, y, c, cov, &chisq, ws);

/* store result ... */
for(i=0; i < degree; i++) {
    store[i] = gsl_vector_get(c, i);
}

gsl_multifit_linear_free(ws);
gsl_matrix_free(X);
gsl_matrix_free(cov);
gsl_vector_free(y);
gsl_vector_free(c);
return true; /* we do not "analyse" the result (cov matrix mainly)
        to know if the fit is "good" */
}

void testcoeff(double *coeff)
{
    printf ("\n polynomial coefficients\n");
    for (int i = 0; i < DEGREE; i++) {
        printf ("  coeff[%d] : %11.7lf\n", i, coeff[i]);
    }
    putchar ('\n');

    printf (" computed values:\n\n   x, yi, yip\n");
    for (unsigned i = 0; i < NP; i++) {
        printf ("%2u,%.7lf,%.7lf\n", i, 
                y[i], 
                i*i*coeff[2] + i*coeff[1] + coeff[0]);
    }
    putchar ('\n');
}

int main (void)
{
    #define ITER 1000
    for (int i=0; i< ITER; i++) {
        polynomialfit (NP, DEGREE, x, y, coeff);
    }
    testcoeff(coeff);

    double sx; 
    for (int i=0; i< ITER; i++) {
        findQuadCoefficients(x, y, coeff, sx, NP);
    }
    printf("critical point %lf\n", sx);
    testcoeff(coeff);
    return 0;
}

参考文献: