RK 积分器中的长双精度误差饱和
Long double precision error saturation in RK integrator
我正在尝试编写一个积分器,它使用长双精度来获得非常高的精度。我知道我的系统架构长期支持双精度,但出于某种原因,我的积分器精度最高可达 16 位有效数字。这里有一些代码可以重现我所看到的。此示例的积分器改编自 this source。在此测试用例中,我使用它来计算欧拉数(对于代码块的长度,我深表歉意,但我无法以任何其他方式重新创建行为):
// ld_int.c: a simple integrator using long doubles
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#define max(x,y) ( (x) < (y) ? (y) : (x) )
#define min(x,y) ( (x) < (y) ? (x) : (y) )
#define ATTEMPTS 50
static long double Runge_Kutta(long double (*f)(long double),
long double *y,
long double x,
long double h);
int Embedded_Verner_3_4(long double (*f)(long double),
long double y[],
long double x,
long double h,
long double xmax,
long double *h_next,
long double tolerance);
long double dydx(long double y);
// the only command-line argument to this is the desired precision.
// that is, if you run this with the argument -14, it will give you
// an answer which is correct to 10^-14
////////////////////////////////////////////////////
int main(int argc, char *argv[]){
long double y0[2] = {1.L, 2.L};
long double x0 = 0.L;
long double h = 1e-4L;
long double xmax = 1L;
long double tol = powl(10, atoi(argv[1]));
Embedded_Verner_3_4(dydx, y0, x0, h, xmax, &h_next, tol)
// compare a long-double value for E
printf("expl(1) = %0.22LF\n", expl(1));
// with the calculated value
printf("eval = %0.22LF\n", y0[1]);
return(0);
}
// this test function just returns the input so it integrates e^x
long double dydx(long double y){
return(y);
}
////////////////////////////////////////////////////////////
// Arguments: //
// double *f Pointer to the function which returns the slope at (x,y) of //
// integral curve of the differential equation y' = f(x,y) //
// which passes through the point (x0,y0) corresponding to the //
// initial condition y(x0) = y0. //
// double y[] On input y[0] is the initial value of y at x, on output //
// y[1] is the solution at xmax. //
// double x The initial value of x. //
// double h Initial step size. //
// double xmax The endpoint of x. //
// double *h_next A pointer to the estimated step size for successive //
// calls to Runge_Kutta_Fehlberg. //
// double tolerance The tolerance of y(xmax), i.e. a solution is sought //
// so that the relative error < tolerance. //
// //
// Return Values: //
// 0 The solution of y' = f(x,y) from x to xmax is stored y[1] and //
// h_next has the value to the next size to try. //
// -1 The solution of y' = f(x,y) from x to xmax failed. //
// -2 Failed because either xmax < x or the step size h <= 0. //
////////////////////////////////////////////////////////////////////
int Embedded_Verner_3_4(long double (*f)(long double),
long double y[],
long double x,
long double h,
long double xmax,
long double *h_next,
long double tolerance){
long double scale;
long double temp_y[2];
long double err;
long double yy;
int i;
int last_interval = 0;
long double MIN_SCALE_FACTOR = 0.125L;
long double MAX_SCALE_FACTOR = 4.0L;
// Verify that the step size is positive and that the upper endpoint //
// of integration is greater than the initial enpoint. //
if (xmax < x || h <= 0.0) return -2;
// If the upper endpoint of the independent variable agrees with the //
// initial value of the independent variable. Set the value of the //
// dependent variable and return success. //
*h_next = h;
y[1] = y[0];
if (xmax == x) return 0;
// Insure that the step size h is not larger than the length of the //
// integration interval. //
h = min(h, xmax - x);
// Redefine the error tolerance to an error tolerance per unit //
// length of the integration interval. //
tolerance /= (xmax - x);
// Integrate the diff eq y'=f(x,y) from x=x to x=xmax trying to //
// maintain an error less than tolerance * (xmax-x) using an //
// initial step size of h and initial value: y = y[0] //
temp_y[0] = y[0];
while (x < xmax){
scale = 1.0L;
for(i = 0; i < ATTEMPTS; i++){
err = fabsl(Runge_Kutta(f, temp_y, x, h));
if(err == 0.0L){
scale = MAX_SCALE_FACTOR;
break;
}
yy = (temp_y[0] == 0.0L)?
tolerance:
fabsl(temp_y[0]);
scale = 0.8L * powl(tolerance * yy / err, 0.2L);
scale = min(max(scale,MIN_SCALE_FACTOR), MAX_SCALE_FACTOR);
if(err < (tolerance * yy))
break;
h *= scale;
if(x + h > xmax){
h = xmax - x;
}
else if(x + h + 0.5L * h > xmax){
h = 0.5L * h;
}
}
if(i >= ATTEMPTS){
printf("\nout of attempts. stats:\nerr = %LF, h = %LF, scale = %LF, x = %LF\n\n", log10l(err), log10l(h), scale, x);
*h_next = h * scale;
return -1;
}
temp_y[0] = temp_y[1];
x += h;
h *= scale;
*h_next = h;
if(last_interval)
break;
if(x + h > xmax){
last_interval = 1;
h = xmax - x;
}
else if(x + h + 0.5 * h > xmax)
h = 0.5 * h;
}
y[1] = temp_y[1];
return 0;
}
static const long double a2 = 2.0 / 7.0;
static const long double a3 = 7.0 / 15.0;
static const long double a4 = 35.0 / 38.0;
static const long double b31 = 77.0 / 900.0;
static const long double b32 = 343.0 / 900.0;
static const long double b41 = 805.0 / 1444.0;
static const long double b42 = -77175.0 / 54872.0;
static const long double b43 = 97125.0 / 54872.0;
static const long double b51 = 79.0 / 490.0;
static const long double b53 = 2175.0 / 3626.0;
static const long double b54 = 2166.0 / 9065.0;
static const long double c1 = 229.0 / 1470.0;
static const long double c3 = 1125.0 / 1813.0;
static const long double c4 = 13718.0 / 81585.0;
static const long double c5 = 1.0 / 18.0;
static const long double d1 = -888.0 / 163170.0;
static const long double d3 = 3375.0 / 163170.0;
static const long double d4 = -11552.0 / 163170.0;
static const long double d5 = 9065.0 / 163170.0;
//////////////////////////////////////////////////////////
// Arguments: //
// double *f Pointer to the function which returns the slope at (y) of //
// integral curve of the differential equation y' = f(y) //
// which passes through the point (x0,y[0]). //
// double y[] On input y[0] is the initial value of y at x, on output //
// y[1] is the solution at x + h. //
// double x Initial value of x. //
// double h Step size //
// //
// Return Values: //
// This routine returns the err / h. The solution of y(x) at x + h is //
// returned in y[1].
///////////////////////////////////////
static long double Runge_Kutta(long double (*f)(long double),
long double *y,
long double x0,
long double h){
long double k1, k2, k3, k4, k5;
long double h2 = a2 * h;
k1 = (*f)(*y);
k2 = (*f)(*y + h2 * k1);
k3 = (*f)(*y + h * ( b31*k1 + b32*k2) );
k4 = (*f)(*y + h * ( b41*k1 + b42*k2 + b43*k3) );
k5 = (*f)(*y + h * ( b51*k1 + b53*k3 + b54*k4) );
*(y+1) = *y + h * (c1*k1 + c3*k3 + c4*k4 + c5*k5);
return d1*k1 + d3*k3 + d4*k4 + d5*k5;
}
我用
编译这段代码
$ gcc -c ld_int.c
$ gcc ld_int.o -lm -o ldint
和运行它与
$ ./ldint -16
将return
expl(1) = 2.7182818284590452354282
eval = 2.7182818284590453085034
其中 E 的两个值一致到小数点后第 16 位。
但是,当我尝试去更高阶时,例如 ./ldt -17
,它突然无法收敛。如果您让积分器打印每一步的错误,您会看到它徘徊在 10^-16.8 左右,但在错误低于 10^-17 之前时间步将变为 0。在我看来,尽管我已尽最大努力使代码中的所有内容都成为 long double,但它仍然在双精度下饱和。
再次,对于上面块中的代码量,我深表歉意,但它确实无法重现我能想到的任何其他方式。我写了一个测试函数来通过它的泰勒展开来计算 E,并且很高兴将它计算为 19 位有效数字和长双精度数。想不通集成案例有什么特别之处
but for some reason, the precision of my integrator maxes out at 16 significant digits.
至少,使用 long double
初始化的更正确值 long double
商而不是 double
商。
// long double a2 = 2.0 / 7.0;
long double a2 = 2.0L / 7.0L;
其他 19 个初始化也是如此。
我正在尝试编写一个积分器,它使用长双精度来获得非常高的精度。我知道我的系统架构长期支持双精度,但出于某种原因,我的积分器精度最高可达 16 位有效数字。这里有一些代码可以重现我所看到的。此示例的积分器改编自 this source。在此测试用例中,我使用它来计算欧拉数(对于代码块的长度,我深表歉意,但我无法以任何其他方式重新创建行为):
// ld_int.c: a simple integrator using long doubles
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#define max(x,y) ( (x) < (y) ? (y) : (x) )
#define min(x,y) ( (x) < (y) ? (x) : (y) )
#define ATTEMPTS 50
static long double Runge_Kutta(long double (*f)(long double),
long double *y,
long double x,
long double h);
int Embedded_Verner_3_4(long double (*f)(long double),
long double y[],
long double x,
long double h,
long double xmax,
long double *h_next,
long double tolerance);
long double dydx(long double y);
// the only command-line argument to this is the desired precision.
// that is, if you run this with the argument -14, it will give you
// an answer which is correct to 10^-14
////////////////////////////////////////////////////
int main(int argc, char *argv[]){
long double y0[2] = {1.L, 2.L};
long double x0 = 0.L;
long double h = 1e-4L;
long double xmax = 1L;
long double tol = powl(10, atoi(argv[1]));
Embedded_Verner_3_4(dydx, y0, x0, h, xmax, &h_next, tol)
// compare a long-double value for E
printf("expl(1) = %0.22LF\n", expl(1));
// with the calculated value
printf("eval = %0.22LF\n", y0[1]);
return(0);
}
// this test function just returns the input so it integrates e^x
long double dydx(long double y){
return(y);
}
////////////////////////////////////////////////////////////
// Arguments: //
// double *f Pointer to the function which returns the slope at (x,y) of //
// integral curve of the differential equation y' = f(x,y) //
// which passes through the point (x0,y0) corresponding to the //
// initial condition y(x0) = y0. //
// double y[] On input y[0] is the initial value of y at x, on output //
// y[1] is the solution at xmax. //
// double x The initial value of x. //
// double h Initial step size. //
// double xmax The endpoint of x. //
// double *h_next A pointer to the estimated step size for successive //
// calls to Runge_Kutta_Fehlberg. //
// double tolerance The tolerance of y(xmax), i.e. a solution is sought //
// so that the relative error < tolerance. //
// //
// Return Values: //
// 0 The solution of y' = f(x,y) from x to xmax is stored y[1] and //
// h_next has the value to the next size to try. //
// -1 The solution of y' = f(x,y) from x to xmax failed. //
// -2 Failed because either xmax < x or the step size h <= 0. //
////////////////////////////////////////////////////////////////////
int Embedded_Verner_3_4(long double (*f)(long double),
long double y[],
long double x,
long double h,
long double xmax,
long double *h_next,
long double tolerance){
long double scale;
long double temp_y[2];
long double err;
long double yy;
int i;
int last_interval = 0;
long double MIN_SCALE_FACTOR = 0.125L;
long double MAX_SCALE_FACTOR = 4.0L;
// Verify that the step size is positive and that the upper endpoint //
// of integration is greater than the initial enpoint. //
if (xmax < x || h <= 0.0) return -2;
// If the upper endpoint of the independent variable agrees with the //
// initial value of the independent variable. Set the value of the //
// dependent variable and return success. //
*h_next = h;
y[1] = y[0];
if (xmax == x) return 0;
// Insure that the step size h is not larger than the length of the //
// integration interval. //
h = min(h, xmax - x);
// Redefine the error tolerance to an error tolerance per unit //
// length of the integration interval. //
tolerance /= (xmax - x);
// Integrate the diff eq y'=f(x,y) from x=x to x=xmax trying to //
// maintain an error less than tolerance * (xmax-x) using an //
// initial step size of h and initial value: y = y[0] //
temp_y[0] = y[0];
while (x < xmax){
scale = 1.0L;
for(i = 0; i < ATTEMPTS; i++){
err = fabsl(Runge_Kutta(f, temp_y, x, h));
if(err == 0.0L){
scale = MAX_SCALE_FACTOR;
break;
}
yy = (temp_y[0] == 0.0L)?
tolerance:
fabsl(temp_y[0]);
scale = 0.8L * powl(tolerance * yy / err, 0.2L);
scale = min(max(scale,MIN_SCALE_FACTOR), MAX_SCALE_FACTOR);
if(err < (tolerance * yy))
break;
h *= scale;
if(x + h > xmax){
h = xmax - x;
}
else if(x + h + 0.5L * h > xmax){
h = 0.5L * h;
}
}
if(i >= ATTEMPTS){
printf("\nout of attempts. stats:\nerr = %LF, h = %LF, scale = %LF, x = %LF\n\n", log10l(err), log10l(h), scale, x);
*h_next = h * scale;
return -1;
}
temp_y[0] = temp_y[1];
x += h;
h *= scale;
*h_next = h;
if(last_interval)
break;
if(x + h > xmax){
last_interval = 1;
h = xmax - x;
}
else if(x + h + 0.5 * h > xmax)
h = 0.5 * h;
}
y[1] = temp_y[1];
return 0;
}
static const long double a2 = 2.0 / 7.0;
static const long double a3 = 7.0 / 15.0;
static const long double a4 = 35.0 / 38.0;
static const long double b31 = 77.0 / 900.0;
static const long double b32 = 343.0 / 900.0;
static const long double b41 = 805.0 / 1444.0;
static const long double b42 = -77175.0 / 54872.0;
static const long double b43 = 97125.0 / 54872.0;
static const long double b51 = 79.0 / 490.0;
static const long double b53 = 2175.0 / 3626.0;
static const long double b54 = 2166.0 / 9065.0;
static const long double c1 = 229.0 / 1470.0;
static const long double c3 = 1125.0 / 1813.0;
static const long double c4 = 13718.0 / 81585.0;
static const long double c5 = 1.0 / 18.0;
static const long double d1 = -888.0 / 163170.0;
static const long double d3 = 3375.0 / 163170.0;
static const long double d4 = -11552.0 / 163170.0;
static const long double d5 = 9065.0 / 163170.0;
//////////////////////////////////////////////////////////
// Arguments: //
// double *f Pointer to the function which returns the slope at (y) of //
// integral curve of the differential equation y' = f(y) //
// which passes through the point (x0,y[0]). //
// double y[] On input y[0] is the initial value of y at x, on output //
// y[1] is the solution at x + h. //
// double x Initial value of x. //
// double h Step size //
// //
// Return Values: //
// This routine returns the err / h. The solution of y(x) at x + h is //
// returned in y[1].
///////////////////////////////////////
static long double Runge_Kutta(long double (*f)(long double),
long double *y,
long double x0,
long double h){
long double k1, k2, k3, k4, k5;
long double h2 = a2 * h;
k1 = (*f)(*y);
k2 = (*f)(*y + h2 * k1);
k3 = (*f)(*y + h * ( b31*k1 + b32*k2) );
k4 = (*f)(*y + h * ( b41*k1 + b42*k2 + b43*k3) );
k5 = (*f)(*y + h * ( b51*k1 + b53*k3 + b54*k4) );
*(y+1) = *y + h * (c1*k1 + c3*k3 + c4*k4 + c5*k5);
return d1*k1 + d3*k3 + d4*k4 + d5*k5;
}
我用
编译这段代码$ gcc -c ld_int.c
$ gcc ld_int.o -lm -o ldint
和运行它与
$ ./ldint -16
将return
expl(1) = 2.7182818284590452354282
eval = 2.7182818284590453085034
其中 E 的两个值一致到小数点后第 16 位。
但是,当我尝试去更高阶时,例如 ./ldt -17
,它突然无法收敛。如果您让积分器打印每一步的错误,您会看到它徘徊在 10^-16.8 左右,但在错误低于 10^-17 之前时间步将变为 0。在我看来,尽管我已尽最大努力使代码中的所有内容都成为 long double,但它仍然在双精度下饱和。
再次,对于上面块中的代码量,我深表歉意,但它确实无法重现我能想到的任何其他方式。我写了一个测试函数来通过它的泰勒展开来计算 E,并且很高兴将它计算为 19 位有效数字和长双精度数。想不通集成案例有什么特别之处
but for some reason, the precision of my integrator maxes out at 16 significant digits.
至少,使用 long double
初始化的更正确值 long double
商而不是 double
商。
// long double a2 = 2.0 / 7.0;
long double a2 = 2.0L / 7.0L;
其他 19 个初始化也是如此。