如何对浮点平方根进行塔克曼舍入
How to Perform Tuckerman Rounding for Floating Point Square Root
我正在尝试执行 Tuckerman 舍入测试以确定正确舍入到最接近的结果。
我用 C++ 创建了一个程序来将两个解与一个数的平方根进行比较,并对它们执行塔克曼检验。但是,C++ 数学库解决方案未能通过 tuckerman 测试,所以我想知道可能是什么问题?
这是我的输出:
Square root program started
Input value is 62a83003
===Tuckerman Test with MATLAB result===
Square root result from MATLAB = 5112b968
g*(g-ulp) = 62a83001
b = 62a83003
g*(g+ulp) = 62a83003
=====>Passes Tuckerman test
===Tuckerman Test with correct result===
Correct square root result = 5112b969
g*(g-ulp) = 62a83003
b = 62a83003
g*(g+ulp) = 62a83005
=====>Fails Tuckerman test
这是我的代码 (C++):
#include <iostream>
#include <cmath>
#include <fstream>
using namespace std;
union newfloat{
float f;
unsigned int i;
};
int main ()
{
// Declare new floating point numbers
newfloat input;
newfloat result, resultm1, resultp1;
newfloat correct_result, correct_resultm1, correct_resultp1;
newfloat resultm1_times_result, resultp1_times_result;
newfloat correct_resultm1_times_result, correct_resultp1_times_result;
// Print message at start of program
cout << "Square root program started" << endl;
input.i = 0x62A83003; // Input we are trying to find the square root of
cout << "Input value is " << hex << input.i << "\n" << endl; // Print input value
result.i = 0x5112B968; // Result from MATLAB
resultm1.i = result.i - 1; // value minus 1 ulp
resultp1.i = result.i + 1; // value plus 1 ulp
correct_result.f = sqrt(input.f); // Compute correct square root
correct_resultm1.i = correct_result.i - 1; // correct value minus 1 ulp
correct_resultp1.i = correct_result.i + 1; // correct value plus 1 ulp
resultm1_times_result.f = result.f * resultm1.f; // Compute g(g-ulp) for matlab result
resultp1_times_result.f = result.f * resultp1.f; // Compute g(g+ulp) for matlab result
correct_resultm1_times_result.f = correct_result.f * correct_resultm1.f; // Compute g*(g-ulp) for correct result
correct_resultp1_times_result.f = correct_result.f * correct_resultp1.f; // Compute g*(g+ulp) for correct result
// Print output from MATLAB algorithm and perform tuckerman test
cout << "===Tuckerman Test with MATLAB result===" << endl;
cout << "Square root result from MATLAB = " << result.i << endl;
cout << "g*(g-ulp) = " << hex << resultm1_times_result.i << endl;
cout << "b = " << hex << input.i << endl;
cout << "g*(g+ulp) = " << hex << resultp1_times_result.i << endl;
if ((resultm1_times_result.f < input.f) && (input.f <= resultp1_times_result.f))
cout << "=====>Passes Tuckerman test" << endl;
else
cout << "=====>Fails Tuckerman test" << endl;
cout << "\n" << endl;
// Print output from C++ sqrt math library and perform tuckerman test
cout << "===Tuckerman Test with correct result===" << endl;
cout << "Correct square root result = " << hex << correct_result.i << endl;
cout << "g*(g-ulp) = " << hex << correct_resultm1_times_result.i << endl;
cout << "b = " << hex << input.i << endl;
cout << "g*(g+ulp) = " << hex << correct_resultp1_times_result.i << endl;
if ((correct_resultm1_times_result.f < input.f) && (input.f <= correct_resultp1_times_result.f))
cout << "=====>Passes Tuckerman test" << endl;
else
cout << "=====>Fails Tuckerman test" << endl;
return 0;
}
引入塔克曼平方根舍入的原始出版物是:
Ramesh C. Agarwal、James W. Cooley、Fred G. Gustavson、James B. Shearer、Gordon Slishman、Bryant Tuckerman、
"New scalar and vector elementary functions for the IBM System/370",IBM J. Res。发展,卷。 30,第 2 期,1986 年 3 月,第 126-144 页。
本文特别指出,用于计算乘积g*(g-ulp)
和g*(g+ulp)
的乘法是截断乘法,而不是舍入乘法:
”然而,这些不等式可以证明等价于
y- *
y < x <= y *
y+ ,
其中 *
表示 System/360/370 乘法(截断结果),以便轻松进行测试
无需额外的精度。 (注意不对称性:一个 <,一个 <=。)如果左不等式失败,则 y 太大;如果
右不等式失败,y 太小了。"
以下 C99 代码显示了如何成功利用 Tuckerman 舍入在单精度平方根函数中提供正确舍入的结果。
#include <stdio.h>
#include <stdlib.h>
#include <fenv.h>
#include <math.h>
#pragma STDC FENV_ACCESS ON
float mul_fp32_rz (float a, float b)
{
float r;
int orig_rnd = fegetround();
fesetround (FE_TOWARDZERO);
r = a * b;
fesetround (orig_rnd);
return r;
}
float my_sqrtf (float a)
{
float b, r, v, w, p, s;
int e, t, f;
if ((a <= 0.0f) || isinff (a) || isnanf (a)) {
if (a < 0.0f) {
r = 0.0f / 0.0f;
} else {
r = a + a;
}
} else {
/* compute exponent adjustments */
b = frexpf (a, &e);
t = e - 2*512;
f = t / 2;
t = t - 2 * f;
f = f + 512;
/* map argument into the primary approximation interval [0.25,1) */
b = ldexpf (b, t);
/* initial approximation to reciprocal square root */
r = -6.10005470e+0f;
r = r * b + 2.28990124e+1f;
r = r * b - 3.48110069e+1f;
r = r * b + 2.76135244e+1f;
r = r * b - 1.24472151e+1f;
r = r * b + 3.84509158e+0f;
/* round rsqrt approximation to 11 bits */
r = rintf (r * 2048.0f);
r = r * (1.0f / 2048.0f);
/* Use A. Schoenhage's coupled iteration for the square root */
v = 0.5f * r;
w = b * r;
w = (w * -w + b) * v + w;
v = (r * -w + 1.0f) * v + v;
w = (w * -w + b) * v + w;
/* Tuckerman rounding: mul_rz (w, w-ulp) < b <= mul_rz (w, w+ulp) */
p = nextafterf (w, 0.0f);
s = nextafterf (w, 2.0f);
if (b <= mul_fp32_rz (w, p)) {
w = p;
} else if (b > mul_fp32_rz (w, s)) {
w = s;
}
/* map back from primary approximation interval by jamming exponent */
r = ldexpf (w, f);
}
return r;
}
int main (void)
{
volatile union {
float f;
unsigned int i;
} arg, res, ref;
arg.i = 0;
do {
res.f = my_sqrtf (arg.f);
ref.f = sqrtf (arg.f);
if (res.i != ref.i) {
printf ("!!!! error @ arg=%08x: res=%08x ref=%08x\n",
arg.i, res.i, ref.i);
break;
}
arg.i++;
} while (arg.i);
return EXIT_SUCCESS;
}
我正在尝试执行 Tuckerman 舍入测试以确定正确舍入到最接近的结果。
我用 C++ 创建了一个程序来将两个解与一个数的平方根进行比较,并对它们执行塔克曼检验。但是,C++ 数学库解决方案未能通过 tuckerman 测试,所以我想知道可能是什么问题?
这是我的输出:
Square root program started
Input value is 62a83003
===Tuckerman Test with MATLAB result===
Square root result from MATLAB = 5112b968
g*(g-ulp) = 62a83001
b = 62a83003
g*(g+ulp) = 62a83003
=====>Passes Tuckerman test
===Tuckerman Test with correct result===
Correct square root result = 5112b969
g*(g-ulp) = 62a83003
b = 62a83003
g*(g+ulp) = 62a83005
=====>Fails Tuckerman test
这是我的代码 (C++):
#include <iostream>
#include <cmath>
#include <fstream>
using namespace std;
union newfloat{
float f;
unsigned int i;
};
int main ()
{
// Declare new floating point numbers
newfloat input;
newfloat result, resultm1, resultp1;
newfloat correct_result, correct_resultm1, correct_resultp1;
newfloat resultm1_times_result, resultp1_times_result;
newfloat correct_resultm1_times_result, correct_resultp1_times_result;
// Print message at start of program
cout << "Square root program started" << endl;
input.i = 0x62A83003; // Input we are trying to find the square root of
cout << "Input value is " << hex << input.i << "\n" << endl; // Print input value
result.i = 0x5112B968; // Result from MATLAB
resultm1.i = result.i - 1; // value minus 1 ulp
resultp1.i = result.i + 1; // value plus 1 ulp
correct_result.f = sqrt(input.f); // Compute correct square root
correct_resultm1.i = correct_result.i - 1; // correct value minus 1 ulp
correct_resultp1.i = correct_result.i + 1; // correct value plus 1 ulp
resultm1_times_result.f = result.f * resultm1.f; // Compute g(g-ulp) for matlab result
resultp1_times_result.f = result.f * resultp1.f; // Compute g(g+ulp) for matlab result
correct_resultm1_times_result.f = correct_result.f * correct_resultm1.f; // Compute g*(g-ulp) for correct result
correct_resultp1_times_result.f = correct_result.f * correct_resultp1.f; // Compute g*(g+ulp) for correct result
// Print output from MATLAB algorithm and perform tuckerman test
cout << "===Tuckerman Test with MATLAB result===" << endl;
cout << "Square root result from MATLAB = " << result.i << endl;
cout << "g*(g-ulp) = " << hex << resultm1_times_result.i << endl;
cout << "b = " << hex << input.i << endl;
cout << "g*(g+ulp) = " << hex << resultp1_times_result.i << endl;
if ((resultm1_times_result.f < input.f) && (input.f <= resultp1_times_result.f))
cout << "=====>Passes Tuckerman test" << endl;
else
cout << "=====>Fails Tuckerman test" << endl;
cout << "\n" << endl;
// Print output from C++ sqrt math library and perform tuckerman test
cout << "===Tuckerman Test with correct result===" << endl;
cout << "Correct square root result = " << hex << correct_result.i << endl;
cout << "g*(g-ulp) = " << hex << correct_resultm1_times_result.i << endl;
cout << "b = " << hex << input.i << endl;
cout << "g*(g+ulp) = " << hex << correct_resultp1_times_result.i << endl;
if ((correct_resultm1_times_result.f < input.f) && (input.f <= correct_resultp1_times_result.f))
cout << "=====>Passes Tuckerman test" << endl;
else
cout << "=====>Fails Tuckerman test" << endl;
return 0;
}
引入塔克曼平方根舍入的原始出版物是:
Ramesh C. Agarwal、James W. Cooley、Fred G. Gustavson、James B. Shearer、Gordon Slishman、Bryant Tuckerman、 "New scalar and vector elementary functions for the IBM System/370",IBM J. Res。发展,卷。 30,第 2 期,1986 年 3 月,第 126-144 页。
本文特别指出,用于计算乘积g*(g-ulp)
和g*(g+ulp)
的乘法是截断乘法,而不是舍入乘法:
”然而,这些不等式可以证明等价于
y- *
y < x <= y *
y+ ,
其中 *
表示 System/360/370 乘法(截断结果),以便轻松进行测试
无需额外的精度。 (注意不对称性:一个 <,一个 <=。)如果左不等式失败,则 y 太大;如果
右不等式失败,y 太小了。"
以下 C99 代码显示了如何成功利用 Tuckerman 舍入在单精度平方根函数中提供正确舍入的结果。
#include <stdio.h>
#include <stdlib.h>
#include <fenv.h>
#include <math.h>
#pragma STDC FENV_ACCESS ON
float mul_fp32_rz (float a, float b)
{
float r;
int orig_rnd = fegetround();
fesetround (FE_TOWARDZERO);
r = a * b;
fesetround (orig_rnd);
return r;
}
float my_sqrtf (float a)
{
float b, r, v, w, p, s;
int e, t, f;
if ((a <= 0.0f) || isinff (a) || isnanf (a)) {
if (a < 0.0f) {
r = 0.0f / 0.0f;
} else {
r = a + a;
}
} else {
/* compute exponent adjustments */
b = frexpf (a, &e);
t = e - 2*512;
f = t / 2;
t = t - 2 * f;
f = f + 512;
/* map argument into the primary approximation interval [0.25,1) */
b = ldexpf (b, t);
/* initial approximation to reciprocal square root */
r = -6.10005470e+0f;
r = r * b + 2.28990124e+1f;
r = r * b - 3.48110069e+1f;
r = r * b + 2.76135244e+1f;
r = r * b - 1.24472151e+1f;
r = r * b + 3.84509158e+0f;
/* round rsqrt approximation to 11 bits */
r = rintf (r * 2048.0f);
r = r * (1.0f / 2048.0f);
/* Use A. Schoenhage's coupled iteration for the square root */
v = 0.5f * r;
w = b * r;
w = (w * -w + b) * v + w;
v = (r * -w + 1.0f) * v + v;
w = (w * -w + b) * v + w;
/* Tuckerman rounding: mul_rz (w, w-ulp) < b <= mul_rz (w, w+ulp) */
p = nextafterf (w, 0.0f);
s = nextafterf (w, 2.0f);
if (b <= mul_fp32_rz (w, p)) {
w = p;
} else if (b > mul_fp32_rz (w, s)) {
w = s;
}
/* map back from primary approximation interval by jamming exponent */
r = ldexpf (w, f);
}
return r;
}
int main (void)
{
volatile union {
float f;
unsigned int i;
} arg, res, ref;
arg.i = 0;
do {
res.f = my_sqrtf (arg.f);
ref.f = sqrtf (arg.f);
if (res.i != ref.i) {
printf ("!!!! error @ arg=%08x: res=%08x ref=%08x\n",
arg.i, res.i, ref.i);
break;
}
arg.i++;
} while (arg.i);
return EXIT_SUCCESS;
}