std::num_put 由于从 float 自动转换为 double 而导致的 nan-boxing 问题
std::num_put issue with nan-boxing due to auto-cast from float to double
我正在使用 to extend nan values with some extra info and 修改 std::cout
行为并显示此额外信息。
这里是定义函数的代码 NumPut
class:
#include <iostream>
#include <assert.h>
#include <limits>
#include <bitset>
#include <cmath>
#include <locale>
#include <ostream>
#include <sstream>
template <typename T>
void showValue( T val, const std::string& what )
{
union uT {
T d;
unsigned long long u;
};
uT ud;
ud.d = val;
std::bitset<sizeof(T) * 8> b(ud.u);
std::cout << val << " (" << what << "): " << b.to_string() << std::endl;
}
template <typename T>
T customizeNaN( T value, char mask )
{
T res = value;
char* ptr = (char*) &res;
assert( ptr[0] == 0 );
ptr[0] |= mask;
return res;
}
template <typename T>
bool isCustomNaN( T value, char mask )
{
char* ptr = (char*) &value;
return ptr[0] == mask;
}
template <typename T>
char getCustomNaNMask( T value )
{
char* ptr = (char*) &value;
return ptr[0];
}
template <typename Iterator = std::ostreambuf_iterator<char> >
class NumPut : public std::num_put<char, Iterator>
{
private:
using base_type = std::num_put<char, Iterator>;
public:
using char_type = typename base_type::char_type;
using iter_type = typename base_type::iter_type;
NumPut(std::size_t refs = 0)
: base_type(refs)
{}
protected:
virtual iter_type do_put(iter_type out, std::ios_base& str, char_type fill, double v) const override {
if(std::isnan(v))
{
char mask = getCustomNaNMask(v);
if ( mask == 0x00 )
{
out = std::copy(std::begin(NotANumber), std::end(NotANumber), out);
}
else
{
std::stringstream maskStr;
maskStr << "(0x" << std::hex << (unsigned) mask << ")";
std::string temp = maskStr.str();
out = std::copy(std::begin(CustomNotANumber), std::end(CustomNotANumber), out);
out = std::copy(std::begin(temp), std::end(temp), out);
}
}
else
{
out = base_type::do_put(out, str, fill, v);
}
return out;
}
private:
static const std::string NotANumber;
static const std::string CustomNotANumber;
};
template<typename Iterator> const std::string NumPut<Iterator>::NotANumber = "Not a Number";
template<typename Iterator> const std::string NumPut<Iterator>::CustomNotANumber = "Custom Not a Number";
inline void fixNaNToStream( std::ostream& str )
{
str.imbue( std::locale(str.getloc(), new NumPut<std::ostreambuf_iterator<char>>() ) );
}
一个简单的测试函数:
template<typename T>
void doTest()
{
T regular_nan = std::numeric_limits<T>::quiet_NaN();
T myNaN1 = customizeNaN( regular_nan, 0x01 );
T myNaN2 = customizeNaN( regular_nan, 0x02 );
showValue( regular_nan, "regular" );
showValue( myNaN1, "custom 1" );
showValue( myNaN2, "custom 2" );
}
我的主程序:
int main(int argc, char *argv[])
{
fixNaNToStream( std::cout );
doTest<double>();
doTest<float>();
return 0;
}
doTest<double>
输出:
Not a Number (regular): 0111111111111000000000000000000000000000000000000000000000000000
Custom Not a Number(0x1) (custom 1): 0111111111111000000000000000000000000000000000000000000000000001
Custom Not a Number(0x2) (custom 2): 0111111111111000000000000000000000000000000000000000000000000010
doTest<float>
输出:
Not a Number (regular): 01111111110000000000000000000000
Not a Number (custom 1): 01111111110000000000000000000001
Not a Number (custom 2): 01111111110000000000000000000010
虽然我希望 float
:
Not a Number (regular): 01111111110000000000000000000000
Custom Not a Number(0x1) (custom 1): 01111111110000000000000000000001
Custom Not a Number(0x2) (custom 2): 01111111110000000000000000000010
问题是 num_put
只有 double
的虚拟 do_put
,float
没有。所以我的 float
被默默地转换为 double
,丢失了我的扩展信息。
我知道有一些替代方法,比如使用第二个 post 中的 FloatFormat
,或者简单地编写一个智能 float2double
函数并在将我的 NaN 值发送到输出流,但他们需要开发人员处理这种情况……他可能会忘记。
有没有办法在 NumPut
class 或任何其他可以在将 float
发送到灌注的 stream
时简单地使事情正常工作的方法很好,因为它适用于 double
?
我的要求是能够简单地为任何输出流(std::cout
、本地 std::stringstream
、...)调用类似 fixNaNToStream
的函数,然后发送 float
和 double
并将它们标识为我的自定义 NaN 并相应地显示。
The problem is that num_put only has a virtual do_put for double, not for float. So my float is silently casted to a double, losing my extended information.
由于float
转double
时携带信息的位位置不同,信息丢失:
// Assuming an IEE-754 floating-point representation of float and double
0 11111111 10000000000000000000010
0 11111111111 1000000000000000000001000000000000000000000000000000
请注意,尾数位是 "shifted" 3 个位置,因为指数还需要 3 个位。
此外,值得注意的是此页面中的内容:https://en.cppreference.com/w/cpp/numeric/math/isnan
Copying a NaN is not required, by IEEE-754, to preserve its bit representation (sign and payload), though most implementation do.
我假设对于转换这些值同样适用,因此,即使忽略 OP 代码中未定义行为的其他原因,NaN 装箱方法是否有效实际上是由实现定义的。
在我以前回答这个问题的尝试中,我使用了一些通过不同偏移量的显式位移来获得结果,但正如 jpo38 也发现的那样,最简单的方法是始终生成一个 float
NaN 然后正确投射。
标准库函数 std::nanf 可用于生成 "customized" float
NaN,但在以下演示代码段中我不会使用它。
#include <cstdint>
#include <limits>
#include <cstring>
#include <cassert>
#include <type_traits>
#include <iostream>
#include <bitset>
#include <array>
#include <climits>
namespace my {
// Waiting for C++20 std::bit_cast
// source: https://en.cppreference.com/w/cpp/numeric/bit_cast
template <class To, class From>
typename std::enable_if<
(sizeof(To) == sizeof(From)) &&
std::is_trivially_copyable<From>::value &&
std::is_trivial<To>::value,
// this implementation requires that To is trivially default constructible
To>::type
// constexpr support needs compiler magic
bit_cast(const From &src) noexcept
{
To dst;
std::memcpy(&dst, &src, sizeof(To));
return dst;
}
template <typename T, std::size_t Size = sizeof(T)>
void print_bits(T x)
{
std::array<unsigned char, Size> buf;
std::memcpy(buf.data(), &x, Size);
for (auto it = buf.crbegin(); it != buf.crend(); ++it)
{
std::bitset<CHAR_BIT> b{*it};
std::cout << b.to_string();
}
std::cout << '\n';
}
// The following assumes that both floats and doubles store the mantissa
// in the lower bits and that while casting a NaN (float->double or double->float)
// the most significant of those aren't changed
template <typename T>
auto boxed_nan(uint8_t data = 0) -> typename std::enable_if<std::numeric_limits<T>::has_quiet_NaN, T>::type
{
return bit_cast<float>(
bit_cast<uint32_t>(std::numeric_limits<float>::quiet_NaN()) |
static_cast<uint32_t>(data)
);
}
template <typename T>
uint8_t unbox_nan(T num)
{
return bit_cast<uint32_t>(static_cast<float>(num));
}
}; // End of namespace 'my'
int main()
{
auto my_nan = my::boxed_nan<float>(42);
my::print_bits(my_nan);
my::print_bits(static_cast<double>(my_nan));
assert(my::unbox_nan(my_nan) == 42);
assert(my::unbox_nan(static_cast<double>(my_nan)) == 42);
auto my_d_nan = my::boxed_nan<double>(17);
my::print_bits(my_d_nan);
my::print_bits(static_cast<float>(my_d_nan));
assert(my::unbox_nan(my_d_nan) == 17);
assert(my::unbox_nan(static_cast<float>(my_d_nan)) == 17);
auto my_ld_nan = my::boxed_nan<long double>(9);
assert(my::unbox_nan(my_ld_nan) == 9);
assert(my::unbox_nan(static_cast<double>(my_ld_nan)) == 9);
}
正如 Bob 指出的那样,如果您希望强制转换以两种方式(从浮点数到双精度以及从双精度到浮点数)工作,则双精度扩展位与偏置指数的相对位置应与浮点数的相对位置相同。
考虑到这一点,一种非常简单的处理方法是使用最右边的位作为浮点数。对于 double,与其尝试手动确定应该使用哪个位,不如简单地停止强制转换操作并让系统识别正确的位置...
则代码变为:
#include <iostream>
#include <assert.h>
#include <limits>
#include <bitset>
#include <cmath>
#include <locale>
#include <ostream>
#include <sstream>
template <typename T>
void showValue( T val, const std::string& what )
{
union uT {
T d;
unsigned long long u;
};
uT ud;
ud.d = val;
std::bitset<sizeof(T) * 8> b(ud.u);
std::cout << val << " (" << what << "): " << b.to_string() << std::endl;
}
char& getCustomNaNMask( float& value )
{
char* ptr = (char*) &value;
return ptr[0];
}
/** temp parameter is mainly used because we can't have two functions with same prototype even if they return different values */
float getCustomizedNaN( char mask, float temp )
{
// let's reuse temp argument as we need a local float variable
temp = std::numeric_limits<float>::quiet_NaN();
getCustomNaNMask(temp) |= mask;
return temp;
}
/** temp parameter is mainly used because we can't have two functions with same prototype even if they return different values */
double getCustomizedNaN( char mask, double temp )
{
float asFloat = getCustomizedNaN( mask, float() );
// Let the system correctly cast from float to double, that's it!
return static_cast<double>( asFloat );
}
template <typename T>
bool isCustomNaN( T value, char mask )
{
return getCustomNaNMask(value) == mask;
}
template <typename Iterator = std::ostreambuf_iterator<char> >
class NumPut : public std::num_put<char, Iterator>
{
private:
using base_type = std::num_put<char, Iterator>;
public:
using char_type = typename base_type::char_type;
using iter_type = typename base_type::iter_type;
NumPut(std::size_t refs = 0)
: base_type(refs)
{}
protected:
virtual iter_type do_put(iter_type out, std::ios_base& str, char_type fill, double v) const override {
if(std::isnan(v))
{
float asFloat = static_cast<float>( v );
char& mask = getCustomNaNMask(asFloat);
if ( mask == 0x00 )
{
out = std::copy(std::begin(NotANumber), std::end(NotANumber), out);
}
else
{
std::stringstream maskStr;
maskStr << "(0x" << std::hex << (unsigned) mask << ")";
std::string temp = maskStr.str();
out = std::copy(std::begin(CustomNotANumber), std::end(CustomNotANumber), out);
out = std::copy(std::begin(temp), std::end(temp), out);
}
}
else
{
out = base_type::do_put(out, str, fill, v);
}
return out;
}
private:
static const std::string NotANumber;
static const std::string CustomNotANumber;
};
template<typename Iterator> const std::string NumPut<Iterator>::NotANumber = "Not a Number";
template<typename Iterator> const std::string NumPut<Iterator>::CustomNotANumber = "Custom Not a Number";
inline void fixNaNToStream( std::ostream& str )
{
str.imbue( std::locale(str.getloc(), new NumPut<std::ostreambuf_iterator<char>>() ) );
}
和测试程序:
template<typename T>
void doTest()
{
T regular_nan = std::numeric_limits<T>::quiet_NaN();
T myNaN1 = getCustomizedNaN( 0x01, T() );
T myNaN2 = getCustomizedNaN( 0x02, T() );
showValue( regular_nan, "regular" );
showValue( myNaN1, "custom 1" );
showValue( myNaN2, "custom 2" );
}
int main(int argc, char *argv[])
{
fixNaNToStream( std::cout );
doTest<double>();
doTest<float>();
return 0;
}
输出:
Not a Number (regular): 0111111111111000000000000000000000000000000000000000000000000000
Custom Not a Number(0x1) (custom 1): 0111111111111000000000000000000000100000000000000000000000000000
Custom Not a Number(0x2) (custom 2): 0111111111111000000000000000000001000000000000000000000000000000
Not a Number (regular): 01111111110000000000000000000000
Custom Not a Number(0x1) (custom 1): 01111111110000000000000000000001
Custom Not a Number(0x2) (custom 2): 01111111110000000000000000000010
谢谢鲍勃!
我正在使用 std::cout
行为并显示此额外信息。
这里是定义函数的代码 NumPut
class:
#include <iostream>
#include <assert.h>
#include <limits>
#include <bitset>
#include <cmath>
#include <locale>
#include <ostream>
#include <sstream>
template <typename T>
void showValue( T val, const std::string& what )
{
union uT {
T d;
unsigned long long u;
};
uT ud;
ud.d = val;
std::bitset<sizeof(T) * 8> b(ud.u);
std::cout << val << " (" << what << "): " << b.to_string() << std::endl;
}
template <typename T>
T customizeNaN( T value, char mask )
{
T res = value;
char* ptr = (char*) &res;
assert( ptr[0] == 0 );
ptr[0] |= mask;
return res;
}
template <typename T>
bool isCustomNaN( T value, char mask )
{
char* ptr = (char*) &value;
return ptr[0] == mask;
}
template <typename T>
char getCustomNaNMask( T value )
{
char* ptr = (char*) &value;
return ptr[0];
}
template <typename Iterator = std::ostreambuf_iterator<char> >
class NumPut : public std::num_put<char, Iterator>
{
private:
using base_type = std::num_put<char, Iterator>;
public:
using char_type = typename base_type::char_type;
using iter_type = typename base_type::iter_type;
NumPut(std::size_t refs = 0)
: base_type(refs)
{}
protected:
virtual iter_type do_put(iter_type out, std::ios_base& str, char_type fill, double v) const override {
if(std::isnan(v))
{
char mask = getCustomNaNMask(v);
if ( mask == 0x00 )
{
out = std::copy(std::begin(NotANumber), std::end(NotANumber), out);
}
else
{
std::stringstream maskStr;
maskStr << "(0x" << std::hex << (unsigned) mask << ")";
std::string temp = maskStr.str();
out = std::copy(std::begin(CustomNotANumber), std::end(CustomNotANumber), out);
out = std::copy(std::begin(temp), std::end(temp), out);
}
}
else
{
out = base_type::do_put(out, str, fill, v);
}
return out;
}
private:
static const std::string NotANumber;
static const std::string CustomNotANumber;
};
template<typename Iterator> const std::string NumPut<Iterator>::NotANumber = "Not a Number";
template<typename Iterator> const std::string NumPut<Iterator>::CustomNotANumber = "Custom Not a Number";
inline void fixNaNToStream( std::ostream& str )
{
str.imbue( std::locale(str.getloc(), new NumPut<std::ostreambuf_iterator<char>>() ) );
}
一个简单的测试函数:
template<typename T>
void doTest()
{
T regular_nan = std::numeric_limits<T>::quiet_NaN();
T myNaN1 = customizeNaN( regular_nan, 0x01 );
T myNaN2 = customizeNaN( regular_nan, 0x02 );
showValue( regular_nan, "regular" );
showValue( myNaN1, "custom 1" );
showValue( myNaN2, "custom 2" );
}
我的主程序:
int main(int argc, char *argv[])
{
fixNaNToStream( std::cout );
doTest<double>();
doTest<float>();
return 0;
}
doTest<double>
输出:
Not a Number (regular): 0111111111111000000000000000000000000000000000000000000000000000
Custom Not a Number(0x1) (custom 1): 0111111111111000000000000000000000000000000000000000000000000001
Custom Not a Number(0x2) (custom 2): 0111111111111000000000000000000000000000000000000000000000000010
doTest<float>
输出:
Not a Number (regular): 01111111110000000000000000000000
Not a Number (custom 1): 01111111110000000000000000000001
Not a Number (custom 2): 01111111110000000000000000000010
虽然我希望 float
:
Not a Number (regular): 01111111110000000000000000000000
Custom Not a Number(0x1) (custom 1): 01111111110000000000000000000001
Custom Not a Number(0x2) (custom 2): 01111111110000000000000000000010
问题是 num_put
只有 double
的虚拟 do_put
,float
没有。所以我的 float
被默默地转换为 double
,丢失了我的扩展信息。
我知道有一些替代方法,比如使用第二个 post 中的 FloatFormat
,或者简单地编写一个智能 float2double
函数并在将我的 NaN 值发送到输出流,但他们需要开发人员处理这种情况……他可能会忘记。
有没有办法在 NumPut
class 或任何其他可以在将 float
发送到灌注的 stream
时简单地使事情正常工作的方法很好,因为它适用于 double
?
我的要求是能够简单地为任何输出流(std::cout
、本地 std::stringstream
、...)调用类似 fixNaNToStream
的函数,然后发送 float
和 double
并将它们标识为我的自定义 NaN 并相应地显示。
The problem is that num_put only has a virtual do_put for double, not for float. So my float is silently casted to a double, losing my extended information.
由于float
转double
时携带信息的位位置不同,信息丢失:
// Assuming an IEE-754 floating-point representation of float and double
0 11111111 10000000000000000000010
0 11111111111 1000000000000000000001000000000000000000000000000000
请注意,尾数位是 "shifted" 3 个位置,因为指数还需要 3 个位。
此外,值得注意的是此页面中的内容:https://en.cppreference.com/w/cpp/numeric/math/isnan
Copying a NaN is not required, by IEEE-754, to preserve its bit representation (sign and payload), though most implementation do.
我假设对于转换这些值同样适用,因此,即使忽略 OP 代码中未定义行为的其他原因,NaN 装箱方法是否有效实际上是由实现定义的。
在我以前回答这个问题的尝试中,我使用了一些通过不同偏移量的显式位移来获得结果,但正如 jpo38 也发现的那样,最简单的方法是始终生成一个 float
NaN 然后正确投射。
标准库函数 std::nanf 可用于生成 "customized" float
NaN,但在以下演示代码段中我不会使用它。
#include <cstdint>
#include <limits>
#include <cstring>
#include <cassert>
#include <type_traits>
#include <iostream>
#include <bitset>
#include <array>
#include <climits>
namespace my {
// Waiting for C++20 std::bit_cast
// source: https://en.cppreference.com/w/cpp/numeric/bit_cast
template <class To, class From>
typename std::enable_if<
(sizeof(To) == sizeof(From)) &&
std::is_trivially_copyable<From>::value &&
std::is_trivial<To>::value,
// this implementation requires that To is trivially default constructible
To>::type
// constexpr support needs compiler magic
bit_cast(const From &src) noexcept
{
To dst;
std::memcpy(&dst, &src, sizeof(To));
return dst;
}
template <typename T, std::size_t Size = sizeof(T)>
void print_bits(T x)
{
std::array<unsigned char, Size> buf;
std::memcpy(buf.data(), &x, Size);
for (auto it = buf.crbegin(); it != buf.crend(); ++it)
{
std::bitset<CHAR_BIT> b{*it};
std::cout << b.to_string();
}
std::cout << '\n';
}
// The following assumes that both floats and doubles store the mantissa
// in the lower bits and that while casting a NaN (float->double or double->float)
// the most significant of those aren't changed
template <typename T>
auto boxed_nan(uint8_t data = 0) -> typename std::enable_if<std::numeric_limits<T>::has_quiet_NaN, T>::type
{
return bit_cast<float>(
bit_cast<uint32_t>(std::numeric_limits<float>::quiet_NaN()) |
static_cast<uint32_t>(data)
);
}
template <typename T>
uint8_t unbox_nan(T num)
{
return bit_cast<uint32_t>(static_cast<float>(num));
}
}; // End of namespace 'my'
int main()
{
auto my_nan = my::boxed_nan<float>(42);
my::print_bits(my_nan);
my::print_bits(static_cast<double>(my_nan));
assert(my::unbox_nan(my_nan) == 42);
assert(my::unbox_nan(static_cast<double>(my_nan)) == 42);
auto my_d_nan = my::boxed_nan<double>(17);
my::print_bits(my_d_nan);
my::print_bits(static_cast<float>(my_d_nan));
assert(my::unbox_nan(my_d_nan) == 17);
assert(my::unbox_nan(static_cast<float>(my_d_nan)) == 17);
auto my_ld_nan = my::boxed_nan<long double>(9);
assert(my::unbox_nan(my_ld_nan) == 9);
assert(my::unbox_nan(static_cast<double>(my_ld_nan)) == 9);
}
正如 Bob 指出的那样,如果您希望强制转换以两种方式(从浮点数到双精度以及从双精度到浮点数)工作,则双精度扩展位与偏置指数的相对位置应与浮点数的相对位置相同。
考虑到这一点,一种非常简单的处理方法是使用最右边的位作为浮点数。对于 double,与其尝试手动确定应该使用哪个位,不如简单地停止强制转换操作并让系统识别正确的位置...
则代码变为:
#include <iostream>
#include <assert.h>
#include <limits>
#include <bitset>
#include <cmath>
#include <locale>
#include <ostream>
#include <sstream>
template <typename T>
void showValue( T val, const std::string& what )
{
union uT {
T d;
unsigned long long u;
};
uT ud;
ud.d = val;
std::bitset<sizeof(T) * 8> b(ud.u);
std::cout << val << " (" << what << "): " << b.to_string() << std::endl;
}
char& getCustomNaNMask( float& value )
{
char* ptr = (char*) &value;
return ptr[0];
}
/** temp parameter is mainly used because we can't have two functions with same prototype even if they return different values */
float getCustomizedNaN( char mask, float temp )
{
// let's reuse temp argument as we need a local float variable
temp = std::numeric_limits<float>::quiet_NaN();
getCustomNaNMask(temp) |= mask;
return temp;
}
/** temp parameter is mainly used because we can't have two functions with same prototype even if they return different values */
double getCustomizedNaN( char mask, double temp )
{
float asFloat = getCustomizedNaN( mask, float() );
// Let the system correctly cast from float to double, that's it!
return static_cast<double>( asFloat );
}
template <typename T>
bool isCustomNaN( T value, char mask )
{
return getCustomNaNMask(value) == mask;
}
template <typename Iterator = std::ostreambuf_iterator<char> >
class NumPut : public std::num_put<char, Iterator>
{
private:
using base_type = std::num_put<char, Iterator>;
public:
using char_type = typename base_type::char_type;
using iter_type = typename base_type::iter_type;
NumPut(std::size_t refs = 0)
: base_type(refs)
{}
protected:
virtual iter_type do_put(iter_type out, std::ios_base& str, char_type fill, double v) const override {
if(std::isnan(v))
{
float asFloat = static_cast<float>( v );
char& mask = getCustomNaNMask(asFloat);
if ( mask == 0x00 )
{
out = std::copy(std::begin(NotANumber), std::end(NotANumber), out);
}
else
{
std::stringstream maskStr;
maskStr << "(0x" << std::hex << (unsigned) mask << ")";
std::string temp = maskStr.str();
out = std::copy(std::begin(CustomNotANumber), std::end(CustomNotANumber), out);
out = std::copy(std::begin(temp), std::end(temp), out);
}
}
else
{
out = base_type::do_put(out, str, fill, v);
}
return out;
}
private:
static const std::string NotANumber;
static const std::string CustomNotANumber;
};
template<typename Iterator> const std::string NumPut<Iterator>::NotANumber = "Not a Number";
template<typename Iterator> const std::string NumPut<Iterator>::CustomNotANumber = "Custom Not a Number";
inline void fixNaNToStream( std::ostream& str )
{
str.imbue( std::locale(str.getloc(), new NumPut<std::ostreambuf_iterator<char>>() ) );
}
和测试程序:
template<typename T>
void doTest()
{
T regular_nan = std::numeric_limits<T>::quiet_NaN();
T myNaN1 = getCustomizedNaN( 0x01, T() );
T myNaN2 = getCustomizedNaN( 0x02, T() );
showValue( regular_nan, "regular" );
showValue( myNaN1, "custom 1" );
showValue( myNaN2, "custom 2" );
}
int main(int argc, char *argv[])
{
fixNaNToStream( std::cout );
doTest<double>();
doTest<float>();
return 0;
}
输出:
Not a Number (regular): 0111111111111000000000000000000000000000000000000000000000000000
Custom Not a Number(0x1) (custom 1): 0111111111111000000000000000000000100000000000000000000000000000
Custom Not a Number(0x2) (custom 2): 0111111111111000000000000000000001000000000000000000000000000000
Not a Number (regular): 01111111110000000000000000000000
Custom Not a Number(0x1) (custom 1): 01111111110000000000000000000001
Custom Not a Number(0x2) (custom 2): 01111111110000000000000000000010
谢谢鲍勃!