IEEE 754 位操作舍入误差
IEEE 754 Bit manipulation Rounding Error
在不使用类型转换或库功能的情况下,我必须通过位操作将整数转换为浮点数。下面是我目前正在处理的代码。它基于我在 中找到的代码。我 运行 遇到的问题涉及 IEEE 754 中的舍入标准。更具体地说,我的代码向 0 舍入,但它应该向偶数舍入。我需要做哪些改变?
unsigned inttofloat(int x) {
int bias = 127;
int man;
int exp = bias + 31; //8-bit exp
int count = 0;
int tmin = 1 << 31;
int manpattern = 0x7FFFFF;
int sign = 0;
if (x == 0){
return 0;
}
else if (x == tmin){
return 0xcf << 24;
}
if (x < 0) {
sign = tmin;
x = ~x + 1; // makes x negative so that we can accurately represent it later on.
}
while((x & tmin) == 0){
exp--;
x <<= 1;
count++;
}
exp <<= 23;
man = (x >> 8) & manpattern;
return (sign | exp | man);
}
到 round 最接近 - 关系到偶数,将 (x >> 8)
替换为:
unsigned u = x; // avoid any potential signed shifting issues
unsigned lease_significant_bit = (u >> 8) & 1;
unsigned round_bit = (u >> 7) & 1; // Most significant bit shifted out
unsigned sticky_bit_flag = !!(u & 0x7F); // All other bits shifts out
// OP's shifted answer.
u = (u >> 8):
// round away if more than half-way or
// if at half-way and number is odd
u += (round_bit & sticky_bit_flag) | (round_bit & lease_significant_bit);
留给 OP 来简化
请注意,u += 1
可能会一直传播并需要指数增长。
在不使用类型转换或库功能的情况下,我必须通过位操作将整数转换为浮点数。下面是我目前正在处理的代码。它基于我在
unsigned inttofloat(int x) {
int bias = 127;
int man;
int exp = bias + 31; //8-bit exp
int count = 0;
int tmin = 1 << 31;
int manpattern = 0x7FFFFF;
int sign = 0;
if (x == 0){
return 0;
}
else if (x == tmin){
return 0xcf << 24;
}
if (x < 0) {
sign = tmin;
x = ~x + 1; // makes x negative so that we can accurately represent it later on.
}
while((x & tmin) == 0){
exp--;
x <<= 1;
count++;
}
exp <<= 23;
man = (x >> 8) & manpattern;
return (sign | exp | man);
}
到 round 最接近 - 关系到偶数,将 (x >> 8)
替换为:
unsigned u = x; // avoid any potential signed shifting issues
unsigned lease_significant_bit = (u >> 8) & 1;
unsigned round_bit = (u >> 7) & 1; // Most significant bit shifted out
unsigned sticky_bit_flag = !!(u & 0x7F); // All other bits shifts out
// OP's shifted answer.
u = (u >> 8):
// round away if more than half-way or
// if at half-way and number is odd
u += (round_bit & sticky_bit_flag) | (round_bit & lease_significant_bit);
留给 OP 来简化
请注意,u += 1
可能会一直传播并需要指数增长。