如何在舍入模式下将double转换为unsigned long long != FE_TOWARDZERO?
How to convert double to unsigned long long under rounding mode != FE_TOWARDZERO?
如何在舍入模式下将double转换为unsigned long long != FE_TOWARDZERO
?
补充:为什么没有ullrint
功能?
标准 C99 数学函数 trunc()
、ceil()
、floor()
和 rint()
(具有适当设置的舍入模式)可用于对 double
值使用所需的舍入模式转换为下一个整数值,然后通过强制转换转换为 unsigned long long
。请注意,足够大的 double
值(幅度 >= 253 的值)保证是整数,因此在这种方法中不存在双舍入问题。
或者(例如,当使用仅支持不提供这些标准数学函数的旧 ISO-C 标准的工具链时),可以通过一些简单的位操作轻松执行转换。以下示例代码演示了这两种方法。它假定 uint64_t
和 unsigned long long
在底层是相同的,并且 double
映射到 IEEE-754 binary64
.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <fenv.h>
#include <math.h>
#define RN (1)
#define RZ (2)
#define RU (3)
#define RD (4)
#define TEST_RND_MODE (RD)
uint64_t double_as_uint64 (double a)
{
uint64_t r;
memcpy (&r, &a, sizeof r);
return r;
}
uint64_t double_to_uint64 (double a, int rnd_mode)
{
const uint64_t uint64_nan = 0x8000000000000000ULL;
const uint64_t uint64_msb = 0x8000000000000000ULL;
const int dbl_expo_bias = 1023;
const int dbl_expo_bits = 11;
const int dbl_expo_mask = ((1 << dbl_expo_bits) - 1);
const int dbl_mant_bits = 52; // stored
uint64_t t = 0ULL;
uint64_t ai = double_as_uint64 (a);
uint64_t r;
int shift;
/* special case handling: season to taste */
if (isnan (a)) return uint64_nan;
if (a >= 0x1.0p64) return 0ULL;
if (a <= 0.0) return 0ULL;
shift = (int)(dbl_expo_bias + 63 - ((ai >> dbl_mant_bits) & dbl_expo_mask));
r = ((ai << dbl_expo_bits) | uint64_msb);
if (shift >= 64) {
t = r >> (shift > 64);
r = 0;
} else if (shift) {
t = r << (64 - shift);
r = r >> shift;
}
if ((rnd_mode == FE_TONEAREST) && (t >= uint64_msb)) {
r += (t == uint64_msb) ? (r & 1) : 1;
} else if ((rnd_mode == FE_UPWARD) && t) {
r++;
}
return r;
}
double uint64_as_double (uint64_t a)
{
double r;
memcpy (&r, &a, sizeof r);
return r;
}
static uint64_t kiss64_x = 1234567890987654321ULL;
static uint64_t kiss64_c = 123456123456123456ULL;
static uint64_t kiss64_y = 362436362436362436ULL;
static uint64_t kiss64_z = 1066149217761810ULL;
static uint64_t kiss64_t;
#define MWC64 (kiss64_t = (kiss64_x << 58) + kiss64_c, \
kiss64_c = (kiss64_x >> 6), kiss64_x += kiss64_t, \
kiss64_c += (kiss64_x < kiss64_t), kiss64_x)
#define XSH64 (kiss64_y ^= (kiss64_y << 13), kiss64_y ^= (kiss64_y >> 17), \
kiss64_y ^= (kiss64_y << 43))
#define CNG64 (kiss64_z = 6906969069ULL * kiss64_z + 1234567ULL)
#define KISS64 (MWC64 + XSH64 + CNG64)
int main (void)
{
double x;
uint64_t res, ref;
do {
x = fabs (uint64_as_double (KISS64));
#if TEST_RND_MODE == RZ
res = double_to_uint64 (x, FE_TOWARDZERO);
ref = (uint64_t)trunc(x);
#elif TEST_RND_MODE == RN
res = double_to_uint64 (x, FE_TONEAREST);
ref = (uint64_t)rint(x); // assumes current rounding mode is FE_TONEAREST!
#elif TEST_RND_MODE == RU
res = double_to_uint64 (x, FE_UPWARD);
ref = (uint64_t)ceil(x);
#elif TEST_RND_MODE == RD
res = double_to_uint64 (x, FE_DOWNWARD);
ref = (uint64_t)floor(x);
#else
#error unsupported TEST_RND_MODE
#endif
if (res != ref) {
printf ("err @ % 22.13a: res=%016llx ref=%016llx\n", x, res, ref);
return EXIT_FAILURE;
}
} while (1);
return EXIT_SUCCESS;
}
如何在舍入模式下将double转换为unsigned long long != FE_TOWARDZERO
?
补充:为什么没有ullrint
功能?
标准 C99 数学函数 trunc()
、ceil()
、floor()
和 rint()
(具有适当设置的舍入模式)可用于对 double
值使用所需的舍入模式转换为下一个整数值,然后通过强制转换转换为 unsigned long long
。请注意,足够大的 double
值(幅度 >= 253 的值)保证是整数,因此在这种方法中不存在双舍入问题。
或者(例如,当使用仅支持不提供这些标准数学函数的旧 ISO-C 标准的工具链时),可以通过一些简单的位操作轻松执行转换。以下示例代码演示了这两种方法。它假定 uint64_t
和 unsigned long long
在底层是相同的,并且 double
映射到 IEEE-754 binary64
.
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <fenv.h>
#include <math.h>
#define RN (1)
#define RZ (2)
#define RU (3)
#define RD (4)
#define TEST_RND_MODE (RD)
uint64_t double_as_uint64 (double a)
{
uint64_t r;
memcpy (&r, &a, sizeof r);
return r;
}
uint64_t double_to_uint64 (double a, int rnd_mode)
{
const uint64_t uint64_nan = 0x8000000000000000ULL;
const uint64_t uint64_msb = 0x8000000000000000ULL;
const int dbl_expo_bias = 1023;
const int dbl_expo_bits = 11;
const int dbl_expo_mask = ((1 << dbl_expo_bits) - 1);
const int dbl_mant_bits = 52; // stored
uint64_t t = 0ULL;
uint64_t ai = double_as_uint64 (a);
uint64_t r;
int shift;
/* special case handling: season to taste */
if (isnan (a)) return uint64_nan;
if (a >= 0x1.0p64) return 0ULL;
if (a <= 0.0) return 0ULL;
shift = (int)(dbl_expo_bias + 63 - ((ai >> dbl_mant_bits) & dbl_expo_mask));
r = ((ai << dbl_expo_bits) | uint64_msb);
if (shift >= 64) {
t = r >> (shift > 64);
r = 0;
} else if (shift) {
t = r << (64 - shift);
r = r >> shift;
}
if ((rnd_mode == FE_TONEAREST) && (t >= uint64_msb)) {
r += (t == uint64_msb) ? (r & 1) : 1;
} else if ((rnd_mode == FE_UPWARD) && t) {
r++;
}
return r;
}
double uint64_as_double (uint64_t a)
{
double r;
memcpy (&r, &a, sizeof r);
return r;
}
static uint64_t kiss64_x = 1234567890987654321ULL;
static uint64_t kiss64_c = 123456123456123456ULL;
static uint64_t kiss64_y = 362436362436362436ULL;
static uint64_t kiss64_z = 1066149217761810ULL;
static uint64_t kiss64_t;
#define MWC64 (kiss64_t = (kiss64_x << 58) + kiss64_c, \
kiss64_c = (kiss64_x >> 6), kiss64_x += kiss64_t, \
kiss64_c += (kiss64_x < kiss64_t), kiss64_x)
#define XSH64 (kiss64_y ^= (kiss64_y << 13), kiss64_y ^= (kiss64_y >> 17), \
kiss64_y ^= (kiss64_y << 43))
#define CNG64 (kiss64_z = 6906969069ULL * kiss64_z + 1234567ULL)
#define KISS64 (MWC64 + XSH64 + CNG64)
int main (void)
{
double x;
uint64_t res, ref;
do {
x = fabs (uint64_as_double (KISS64));
#if TEST_RND_MODE == RZ
res = double_to_uint64 (x, FE_TOWARDZERO);
ref = (uint64_t)trunc(x);
#elif TEST_RND_MODE == RN
res = double_to_uint64 (x, FE_TONEAREST);
ref = (uint64_t)rint(x); // assumes current rounding mode is FE_TONEAREST!
#elif TEST_RND_MODE == RU
res = double_to_uint64 (x, FE_UPWARD);
ref = (uint64_t)ceil(x);
#elif TEST_RND_MODE == RD
res = double_to_uint64 (x, FE_DOWNWARD);
ref = (uint64_t)floor(x);
#else
#error unsupported TEST_RND_MODE
#endif
if (res != ref) {
printf ("err @ % 22.13a: res=%016llx ref=%016llx\n", x, res, ref);
return EXIT_FAILURE;
}
} while (1);
return EXIT_SUCCESS;
}