将给定的尾数、指数和符号转换为浮点数?
Converting given mantissa, exponent, and sign to float?
我得到了尾数、指数和符号,我必须将其转换为相应的浮点数。我使用 22 位作为尾数,9 位作为指数,1 位作为符号。
我在概念上知道如何将它们转换成浮点数,首先将指数调整回原位,然后将结果数字转换回浮点数,但我在 C 中实现这个时遇到了麻烦。我看到 ,但我看不懂代码,我不确定答案是否正确。谁能指出我正确的方向?我需要用 C
编码
编辑:我取得了一些进展,首先将尾数转换为二进制,然后调整二进制的小数点,然后将小数点二进制转换回实际的浮点数。我的转换函数基于这两个 GeekforGeek 页面 (one, two) 但似乎进行所有这些二进制转换是漫长而艰难的。上面的 link 显然是通过使用 >> 运算符以非常小的步骤完成的,但我不明白它是如何产生浮点数的。
链接的问题是 C++ 而不是 C。要在 C 保留位的数据类型之间进行转换,可以使用联合的工具。像
union float_or_int {
uint32_t i;
float f;
}
float to_float(uint32_t mantissa, uint32_t exponent, uint32_t sign)
{
union float_or_int result;
result.i = (sign << 31) | (exponent << 22) | mantissa;
return result.f;
}
抱歉打字错误,我已经有一段时间没有用 C 编写代码了
这是一个程序,其中有解释解码的注释:
#include <inttypes.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
// Define constants describing the floating-point encoding.
enum
{
SignificandBits = 22, // Number of bits in signficand field.
ExponentBits = 9, // Number of bits in exponent field.
ExponentMaximum = (1 << ExponentBits) - 1,
ExponentBias = (1 << ExponentBits-1) - 1,
};
/* Given the contents of the sign, exponent, and significand fields that
encode a floating-point number following IEEE-754 patterns for binary
floating-point, return the encoded number.
"double" is used for the return type as not all values represented by the
sample format (9 exponent bits, 22 significand bits) will fit in a "float"
when it is the commonly used IEEE-754 binary32 format.
*/
double DecodeCustomFloat(
unsigned SignField, uint32_t ExponentField, uint32_t SignificandField)
{
/* We are given a significand field as an integer, but it is used as the
value of a binary numeral consisting of “.” followed by the significand
bits. That value equals the integer divided by 2 to the power of the
number of significand bits. Define a constant with that value to be
used for converting the significand field to represented value.
*/
static const double SignificandRatio = (uint32_t) 1 << SignificandBits;
/* Decode the sign field:
If the sign bit is 0, the sign is +, for which we use +1.
If the sign bit is 1, the sign is -, for which we use -1.
*/
double Sign = SignField ? -1. : +1.;
// Dispatch to handle the different categories of exponent field.
switch (ExponentField)
{
/* When the exponent field is all ones, the value represented is a
NaN or infinity:
If the significand field is zero, it is an infinity.
Otherwise, it is a NaN. In either case, the sign should be
preserved.
Note this is a simple demonstration implementation that does not
preserve the bits in the significand field of a NaN -- we just
return the generic NAN without attempting to set its significand
bits.
*/
case ExponentMaximum:
{
return Sign * (SignificandField ? NAN : INFINITY);
}
/* When the exponent field is not all zeros or all ones, the value
represented is a normal number:
The exponent represented is ExponentField - ExponentBias, and
the significand represented is the value given by the binary
numeral “1.” followed by the significand bits.
*/
default:
{
int Exponent = ExponentField - ExponentBias;
double Significand = 1 + SignificandField / SignificandRatio;
return Sign * ldexp(Significand, Exponent);
}
/* When the exponent field is zero, the value represented is subnormal:
The exponent represented is 1 - ExponentBias, and the
significand represented is the value given by the binary
numeral “0.” followed by the significand bits.
*/
case 0:
{
int Exponent = 1 - ExponentBias;
double Significand = 0 + SignificandField / SignificandRatio;
return Sign * ldexp(Significand, Exponent);
}
}
}
/* Test that a given set of fields decodes to the expected value and
print the fields and the decoded value.
*/
static void Demonstrate(
unsigned SignField, uint32_t SignificandField, uint32_t ExponentField,
double Expected)
{
double Observed
= DecodeCustomFloat(SignField, SignificandField, ExponentField);
if (! (Observed == Expected) && ! (isnan(Observed) && isnan(Expected)))
{
fprintf(stderr,
"Error, expected (%u, %" PRIu32 ", %" PRIu32 ") to represent "
"%g (hexadecimal %a) but got %g (hexadecimal %a).\n",
SignField, SignificandField, ExponentField,
Expected, Expected,
Observed, Observed);
exit(EXIT_FAILURE);
}
printf(
"(%u, %" PRIu32 ", %" PRIu32 ") represents %g (hexadecimal %a).\n",
SignField, SignificandField, ExponentField, Observed, Observed);
}
int main(void)
{
Demonstrate(0, 0, 0, +0.);
Demonstrate(1, 0, 0, -0.);
Demonstrate(0, 255, 0, +1.);
Demonstrate(1, 255, 0, -1.);
Demonstrate(0, 511, 0, +INFINITY);
Demonstrate(1, 511, 0, -INFINITY);
Demonstrate(0, 511, 1, +NAN);
Demonstrate(1, 511, 1, -NAN);
Demonstrate(0, 0, 1, +0x1p-276);
Demonstrate(1, 0, 1, -0x1p-276);
Demonstrate(0, 255, 1, +1. + 0x1p-22);
Demonstrate(1, 255, 1, -1. - 0x1p-22);
Demonstrate(0, 1, 0, +0x1p-254);
Demonstrate(1, 1, 0, -0x1p-254);
Demonstrate(0, 510, 0x3fffff, +0x1p256 - 0x1p233);
Demonstrate(1, 510, 0x3fffff, -0x1p256 + 0x1p233);
}
一些注意事项:
ldexp
是一个标准的C库函数。 ldexp(x, e)
returns x
乘以 2 的 e
次方。
uint32_t
是一个无符号的 32 位整数类型。它在 stdint.h
. 中定义
"%" PRIu32
提供 printf
格式转换规范 uint32_t
.
下面是一个简单的程序,用于说明如何将 float
分解成其组件以及如何从(符号、指数、尾数)三元组组成 float
值:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
void dumpbits(uint32_t bits, int n) {
while (n--)
printf("%d%c", (bits >> n) & 1, ".|"[!n]);
}
int main(int argc, char *argv[]) {
unsigned sign = 0;
unsigned exponent = 127;
unsigned long mantissa = 0;
union {
float f32;
uint32_t u32;
} u;
if (argc == 2) {
u.f32 = strtof(argv[1], NULL);
sign = u.u32 >> 31;
exponent = (u.u32 >> 23) & 0xff;
mantissa = (u.u32) & 0x7fffff;
printf("%.8g -> sign:%u, exponent:%u, mantissa:0x%06lx\n",
(double)u.f32, sign, exponent, mantissa);
printf("+s+----exponent---+------------------mantissa-------------------+\n");
printf("|");
dumpbits(sign, 1);
dumpbits(exponent, 8);
dumpbits(mantissa, 23);
printf("\n");
printf("+-+---------------+---------------------------------------------+\n");
} else {
if (argc > 1) sign = strtol(argv[1], NULL, 0);
if (argc > 2) exponent = strtol(argv[2], NULL, 0);
if (argc > 3) mantissa = strtol(argv[3], NULL, 0);
u.u32 = (sign << 31) | (exponent << 23) | mantissa;
printf("sign:%u, exponent:%u, mantissa:0x%06lx -> %.8g\n",
sign, exponent, mantissa, (double)u.f32);
}
return 0;
}
请注意,与您的分配相反,尾数的大小为 23 位,指数为 8 位,对应于 IEEE 754 标准的 32 位又名 单精度 浮动。请参阅 Single-precision floating-point format.
上的维基百科文章
我得到了尾数、指数和符号,我必须将其转换为相应的浮点数。我使用 22 位作为尾数,9 位作为指数,1 位作为符号。
我在概念上知道如何将它们转换成浮点数,首先将指数调整回原位,然后将结果数字转换回浮点数,但我在 C 中实现这个时遇到了麻烦。我看到
编辑:我取得了一些进展,首先将尾数转换为二进制,然后调整二进制的小数点,然后将小数点二进制转换回实际的浮点数。我的转换函数基于这两个 GeekforGeek 页面 (one, two) 但似乎进行所有这些二进制转换是漫长而艰难的。上面的 link 显然是通过使用 >> 运算符以非常小的步骤完成的,但我不明白它是如何产生浮点数的。
链接的问题是 C++ 而不是 C。要在 C 保留位的数据类型之间进行转换,可以使用联合的工具。像
union float_or_int {
uint32_t i;
float f;
}
float to_float(uint32_t mantissa, uint32_t exponent, uint32_t sign)
{
union float_or_int result;
result.i = (sign << 31) | (exponent << 22) | mantissa;
return result.f;
}
抱歉打字错误,我已经有一段时间没有用 C 编写代码了
这是一个程序,其中有解释解码的注释:
#include <inttypes.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
// Define constants describing the floating-point encoding.
enum
{
SignificandBits = 22, // Number of bits in signficand field.
ExponentBits = 9, // Number of bits in exponent field.
ExponentMaximum = (1 << ExponentBits) - 1,
ExponentBias = (1 << ExponentBits-1) - 1,
};
/* Given the contents of the sign, exponent, and significand fields that
encode a floating-point number following IEEE-754 patterns for binary
floating-point, return the encoded number.
"double" is used for the return type as not all values represented by the
sample format (9 exponent bits, 22 significand bits) will fit in a "float"
when it is the commonly used IEEE-754 binary32 format.
*/
double DecodeCustomFloat(
unsigned SignField, uint32_t ExponentField, uint32_t SignificandField)
{
/* We are given a significand field as an integer, but it is used as the
value of a binary numeral consisting of “.” followed by the significand
bits. That value equals the integer divided by 2 to the power of the
number of significand bits. Define a constant with that value to be
used for converting the significand field to represented value.
*/
static const double SignificandRatio = (uint32_t) 1 << SignificandBits;
/* Decode the sign field:
If the sign bit is 0, the sign is +, for which we use +1.
If the sign bit is 1, the sign is -, for which we use -1.
*/
double Sign = SignField ? -1. : +1.;
// Dispatch to handle the different categories of exponent field.
switch (ExponentField)
{
/* When the exponent field is all ones, the value represented is a
NaN or infinity:
If the significand field is zero, it is an infinity.
Otherwise, it is a NaN. In either case, the sign should be
preserved.
Note this is a simple demonstration implementation that does not
preserve the bits in the significand field of a NaN -- we just
return the generic NAN without attempting to set its significand
bits.
*/
case ExponentMaximum:
{
return Sign * (SignificandField ? NAN : INFINITY);
}
/* When the exponent field is not all zeros or all ones, the value
represented is a normal number:
The exponent represented is ExponentField - ExponentBias, and
the significand represented is the value given by the binary
numeral “1.” followed by the significand bits.
*/
default:
{
int Exponent = ExponentField - ExponentBias;
double Significand = 1 + SignificandField / SignificandRatio;
return Sign * ldexp(Significand, Exponent);
}
/* When the exponent field is zero, the value represented is subnormal:
The exponent represented is 1 - ExponentBias, and the
significand represented is the value given by the binary
numeral “0.” followed by the significand bits.
*/
case 0:
{
int Exponent = 1 - ExponentBias;
double Significand = 0 + SignificandField / SignificandRatio;
return Sign * ldexp(Significand, Exponent);
}
}
}
/* Test that a given set of fields decodes to the expected value and
print the fields and the decoded value.
*/
static void Demonstrate(
unsigned SignField, uint32_t SignificandField, uint32_t ExponentField,
double Expected)
{
double Observed
= DecodeCustomFloat(SignField, SignificandField, ExponentField);
if (! (Observed == Expected) && ! (isnan(Observed) && isnan(Expected)))
{
fprintf(stderr,
"Error, expected (%u, %" PRIu32 ", %" PRIu32 ") to represent "
"%g (hexadecimal %a) but got %g (hexadecimal %a).\n",
SignField, SignificandField, ExponentField,
Expected, Expected,
Observed, Observed);
exit(EXIT_FAILURE);
}
printf(
"(%u, %" PRIu32 ", %" PRIu32 ") represents %g (hexadecimal %a).\n",
SignField, SignificandField, ExponentField, Observed, Observed);
}
int main(void)
{
Demonstrate(0, 0, 0, +0.);
Demonstrate(1, 0, 0, -0.);
Demonstrate(0, 255, 0, +1.);
Demonstrate(1, 255, 0, -1.);
Demonstrate(0, 511, 0, +INFINITY);
Demonstrate(1, 511, 0, -INFINITY);
Demonstrate(0, 511, 1, +NAN);
Demonstrate(1, 511, 1, -NAN);
Demonstrate(0, 0, 1, +0x1p-276);
Demonstrate(1, 0, 1, -0x1p-276);
Demonstrate(0, 255, 1, +1. + 0x1p-22);
Demonstrate(1, 255, 1, -1. - 0x1p-22);
Demonstrate(0, 1, 0, +0x1p-254);
Demonstrate(1, 1, 0, -0x1p-254);
Demonstrate(0, 510, 0x3fffff, +0x1p256 - 0x1p233);
Demonstrate(1, 510, 0x3fffff, -0x1p256 + 0x1p233);
}
一些注意事项:
ldexp
是一个标准的C库函数。ldexp(x, e)
returnsx
乘以 2 的e
次方。uint32_t
是一个无符号的 32 位整数类型。它在stdint.h
. 中定义
"%" PRIu32
提供printf
格式转换规范uint32_t
.
下面是一个简单的程序,用于说明如何将 float
分解成其组件以及如何从(符号、指数、尾数)三元组组成 float
值:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
void dumpbits(uint32_t bits, int n) {
while (n--)
printf("%d%c", (bits >> n) & 1, ".|"[!n]);
}
int main(int argc, char *argv[]) {
unsigned sign = 0;
unsigned exponent = 127;
unsigned long mantissa = 0;
union {
float f32;
uint32_t u32;
} u;
if (argc == 2) {
u.f32 = strtof(argv[1], NULL);
sign = u.u32 >> 31;
exponent = (u.u32 >> 23) & 0xff;
mantissa = (u.u32) & 0x7fffff;
printf("%.8g -> sign:%u, exponent:%u, mantissa:0x%06lx\n",
(double)u.f32, sign, exponent, mantissa);
printf("+s+----exponent---+------------------mantissa-------------------+\n");
printf("|");
dumpbits(sign, 1);
dumpbits(exponent, 8);
dumpbits(mantissa, 23);
printf("\n");
printf("+-+---------------+---------------------------------------------+\n");
} else {
if (argc > 1) sign = strtol(argv[1], NULL, 0);
if (argc > 2) exponent = strtol(argv[2], NULL, 0);
if (argc > 3) mantissa = strtol(argv[3], NULL, 0);
u.u32 = (sign << 31) | (exponent << 23) | mantissa;
printf("sign:%u, exponent:%u, mantissa:0x%06lx -> %.8g\n",
sign, exponent, mantissa, (double)u.f32);
}
return 0;
}
请注意,与您的分配相反,尾数的大小为 23 位,指数为 8 位,对应于 IEEE 754 标准的 32 位又名 单精度 浮动。请参阅 Single-precision floating-point format.
上的维基百科文章