std::num_put 由于从 float 自动转换为 double 而导致的 nan-boxing 问题

std::num_put issue with nan-boxing due to auto-cast from float to double

我正在使用 to extend nan values with some extra info and 修改 std::cout 行为并显示此额外信息。

这里是定义函数的代码 NumPut class:

#include <iostream>
#include <assert.h>
#include <limits>
#include <bitset>
#include <cmath>
#include <locale>
#include <ostream>
#include <sstream>

template <typename T>
void showValue( T val, const std::string& what )
{
    union uT {
      T d;
      unsigned long long u;
    };
    uT ud;
    ud.d = val;
    std::bitset<sizeof(T) * 8> b(ud.u);
    std::cout << val << " (" << what << "): " << b.to_string() << std::endl;
}

template <typename T>
T customizeNaN( T value, char mask )
{
    T res = value;
    char* ptr = (char*) &res;
    assert( ptr[0] == 0 );
    ptr[0] |= mask;
    return res;
}

template <typename T>
bool isCustomNaN( T value, char mask )
{
    char* ptr = (char*) &value;
    return ptr[0] == mask;
}

template <typename T>
char getCustomNaNMask( T value )
{
    char* ptr = (char*) &value;
    return ptr[0];
}

template <typename Iterator = std::ostreambuf_iterator<char> >
class NumPut : public std::num_put<char, Iterator>
{
private:
    using base_type = std::num_put<char, Iterator>;

public:
    using char_type = typename base_type::char_type;
    using iter_type = typename base_type::iter_type;

    NumPut(std::size_t refs = 0)
    :   base_type(refs)
    {}

protected:
    virtual iter_type do_put(iter_type out, std::ios_base& str, char_type fill, double v) const override {
        if(std::isnan(v))
        {
            char mask = getCustomNaNMask(v);
            if ( mask == 0x00 )
            {
                out = std::copy(std::begin(NotANumber), std::end(NotANumber), out);
            }
            else
            {
                std::stringstream maskStr;
                maskStr << "(0x" << std::hex << (unsigned) mask << ")";
                std::string temp = maskStr.str();
                out = std::copy(std::begin(CustomNotANumber), std::end(CustomNotANumber), out);
                out = std::copy(std::begin(temp), std::end(temp), out);
            }
        }
        else
        {
            out = base_type::do_put(out, str, fill, v);
        }
        return out;
    }

private:
    static const std::string NotANumber;
    static const std::string CustomNotANumber;
};

template<typename Iterator> const std::string NumPut<Iterator>::NotANumber = "Not a Number";
template<typename Iterator> const std::string NumPut<Iterator>::CustomNotANumber = "Custom Not a Number";

inline void fixNaNToStream( std::ostream& str )
{
    str.imbue( std::locale(str.getloc(), new NumPut<std::ostreambuf_iterator<char>>() ) );
}

一个简单的测试函数:

template<typename T>
void doTest()
{
    T regular_nan = std::numeric_limits<T>::quiet_NaN();
    T myNaN1 = customizeNaN( regular_nan, 0x01 );
    T myNaN2 = customizeNaN( regular_nan, 0x02 );

    showValue( regular_nan, "regular" );
    showValue( myNaN1, "custom 1" );
    showValue( myNaN2, "custom 2" );
}

我的主程序:

int main(int argc, char *argv[])
{
    fixNaNToStream( std::cout );

    doTest<double>();
    doTest<float>();

    return 0;
}

doTest<double> 输出:

Not a Number (regular): 0111111111111000000000000000000000000000000000000000000000000000
Custom Not a Number(0x1) (custom 1): 0111111111111000000000000000000000000000000000000000000000000001
Custom Not a Number(0x2) (custom 2): 0111111111111000000000000000000000000000000000000000000000000010

doTest<float> 输出:

Not a Number (regular): 01111111110000000000000000000000
Not a Number (custom 1): 01111111110000000000000000000001
Not a Number (custom 2): 01111111110000000000000000000010

虽然我希望 float:

Not a Number (regular): 01111111110000000000000000000000
Custom Not a Number(0x1) (custom 1): 01111111110000000000000000000001
Custom Not a Number(0x2) (custom 2): 01111111110000000000000000000010

问题是 num_put 只有 double 的虚拟 do_putfloat 没有。所以我的 float 被默默地转换为 double,丢失了我的扩展信息。

我知道有一些替代方法,比如使用第二个 post 中的 FloatFormat,或者简单地编写一个智能 float2double 函数并在将我的 NaN 值发送到输出流,但他们需要开发人员处理这种情况……他可能会忘记。

有没有办法在 NumPut class 或任何其他可以在将 float 发送到灌注的 stream 时简单地使事情正常工作的方法很好,因为它适用于 double?

我的要求是能够简单地为任何输出流(std::cout、本地 std::stringstream、...)调用类似 fixNaNToStream 的函数,然后发送 floatdouble 并将它们标识为我的自定义 NaN 并相应地显示。

The problem is that num_put only has a virtual do_put for double, not for float. So my float is silently casted to a double, losing my extended information.

由于floatdouble时携带信息的位位置不同,信息丢失:

// Assuming an IEE-754 floating-point representation of float and double
0 11111111 10000000000000000000010
0 11111111111 1000000000000000000001000000000000000000000000000000

请注意,尾数位是 "shifted" 3 个位置,因为指数还需要 3 个位。

此外,值得注意的是此页面中的内容:https://en.cppreference.com/w/cpp/numeric/math/isnan

Copying a NaN is not required, by IEEE-754, to preserve its bit representation (sign and payload), though most implementation do.

我假设对于转换这些值同样适用,因此,即使忽略 OP 代码中未定义行为的其他原因,NaN 装箱方法是否有效实际上是由实现定义的。

在我以前回答这个问题的尝试中,我使用了一些通过不同偏移量的显式位移来获得结果,但正如 jpo38 也发现的那样,最简单的方法是始终生成一个 float NaN 然后正确投射。

标准库函数 std::nanf 可用于生成 "customized" float NaN,但在以下演示代码段中我不会使用它。

#include <cstdint>
#include <limits>
#include <cstring>
#include <cassert>
#include <type_traits>
#include <iostream>
#include <bitset>
#include <array>
#include <climits>

namespace my {

// Waiting for C++20 std::bit_cast
// source: https://en.cppreference.com/w/cpp/numeric/bit_cast
template <class To, class From>
typename std::enable_if<
    (sizeof(To) == sizeof(From)) &&
    std::is_trivially_copyable<From>::value &&
    std::is_trivial<To>::value,
    // this implementation requires that To is trivially default constructible
    To>::type
// constexpr support needs compiler magic
bit_cast(const From &src) noexcept
{
    To dst;
    std::memcpy(&dst, &src, sizeof(To));
    return dst;
}

template <typename T, std::size_t Size = sizeof(T)>
void print_bits(T x)
{
    std::array<unsigned char, Size> buf;
    std::memcpy(buf.data(), &x, Size);
    for (auto it = buf.crbegin(); it != buf.crend(); ++it)
    {
        std::bitset<CHAR_BIT> b{*it};
        std::cout << b.to_string();
    }
    std::cout << '\n';
}

// The following assumes that both floats and doubles store the mantissa
// in the lower bits and that while casting a NaN (float->double or double->float)
// the most significant of those aren't changed
template <typename T>
auto boxed_nan(uint8_t data = 0) -> typename std::enable_if<std::numeric_limits<T>::has_quiet_NaN, T>::type
{
    return bit_cast<float>(
        bit_cast<uint32_t>(std::numeric_limits<float>::quiet_NaN()) |
        static_cast<uint32_t>(data)
    );
}

template <typename T>
uint8_t unbox_nan(T num)
{
    return bit_cast<uint32_t>(static_cast<float>(num));
}

}; // End of namespace 'my'


int main()
{
    auto my_nan = my::boxed_nan<float>(42);
    my::print_bits(my_nan);
    my::print_bits(static_cast<double>(my_nan));
    assert(my::unbox_nan(my_nan) == 42);
    assert(my::unbox_nan(static_cast<double>(my_nan)) == 42);

    auto my_d_nan = my::boxed_nan<double>(17);
    my::print_bits(my_d_nan);
    my::print_bits(static_cast<float>(my_d_nan));
    assert(my::unbox_nan(my_d_nan) == 17);
    assert(my::unbox_nan(static_cast<float>(my_d_nan)) == 17);

    auto my_ld_nan = my::boxed_nan<long double>(9);
    assert(my::unbox_nan(my_ld_nan) == 9);
    assert(my::unbox_nan(static_cast<double>(my_ld_nan)) == 9);
}

正如 Bob 指出的那样,如果您希望强制转换以两种方式(从浮点数到双精度以及从双精度到浮点数)工作,则双精度扩展位与偏置指数的相对位置应与浮点数的相对位置相同。

考虑到这一点,一种非常简单的处理方法是使用最右边的位作为浮点数。对于 double,与其尝试手动确定应该使用哪个位,不如简单地停止强制转换操作并让系统识别正确的位置...

则代码变为:

#include <iostream>
#include <assert.h>
#include <limits>
#include <bitset>
#include <cmath>
#include <locale>
#include <ostream>
#include <sstream>

template <typename T>
void showValue( T val, const std::string& what )
{
    union uT {
      T d;
      unsigned long long u;
    };
    uT ud;
    ud.d = val;
    std::bitset<sizeof(T) * 8> b(ud.u);
    std::cout << val << " (" << what << "): " << b.to_string() << std::endl;
}

char& getCustomNaNMask( float& value )
{
    char* ptr = (char*) &value;
    return ptr[0];
}

/** temp parameter is mainly used because we can't have two functions with same prototype even if they return different values */
float getCustomizedNaN( char mask, float temp )
{
    // let's reuse temp argument as we need a local float variable
    temp = std::numeric_limits<float>::quiet_NaN();
    getCustomNaNMask(temp) |= mask;
    return temp;
}

/** temp parameter is mainly used because we can't have two functions with same prototype even if they return different values */
double getCustomizedNaN( char mask, double temp )
{
    float asFloat = getCustomizedNaN( mask, float() );
    // Let the system correctly cast from float to double, that's it!
    return static_cast<double>( asFloat );
}

template <typename T>
bool isCustomNaN( T value, char mask )
{
    return getCustomNaNMask(value) == mask;
}

template <typename Iterator = std::ostreambuf_iterator<char> >
class NumPut : public std::num_put<char, Iterator>
{
private:
    using base_type = std::num_put<char, Iterator>;

public:
    using char_type = typename base_type::char_type;
    using iter_type = typename base_type::iter_type;

    NumPut(std::size_t refs = 0)
    :   base_type(refs)
    {}

protected:
    virtual iter_type do_put(iter_type out, std::ios_base& str, char_type fill, double v) const override {
        if(std::isnan(v))
        {
            float asFloat = static_cast<float>( v );
            char& mask = getCustomNaNMask(asFloat);
            if ( mask == 0x00 )
            {
                out = std::copy(std::begin(NotANumber), std::end(NotANumber), out);
            }
            else
            {
                std::stringstream maskStr;
                maskStr << "(0x" << std::hex << (unsigned) mask << ")";
                std::string temp = maskStr.str();
                out = std::copy(std::begin(CustomNotANumber), std::end(CustomNotANumber), out);
                out = std::copy(std::begin(temp), std::end(temp), out);
            }
        }
        else
        {
            out = base_type::do_put(out, str, fill, v);
        }
        return out;
    }

private:
    static const std::string NotANumber;
    static const std::string CustomNotANumber;
};

template<typename Iterator> const std::string NumPut<Iterator>::NotANumber = "Not a Number";
template<typename Iterator> const std::string NumPut<Iterator>::CustomNotANumber = "Custom Not a Number";

inline void fixNaNToStream( std::ostream& str )
{
    str.imbue( std::locale(str.getloc(), new NumPut<std::ostreambuf_iterator<char>>() ) );
}

和测试程序:

template<typename T>
void doTest()
{
    T regular_nan = std::numeric_limits<T>::quiet_NaN();
    T myNaN1 = getCustomizedNaN( 0x01, T() );
    T myNaN2 = getCustomizedNaN( 0x02, T() );

    showValue( regular_nan, "regular" );
    showValue( myNaN1, "custom 1" );
    showValue( myNaN2, "custom 2" );
}

int main(int argc, char *argv[])
{
    fixNaNToStream( std::cout );

    doTest<double>();
    doTest<float>();

    return 0;
}

输出:

Not a Number (regular): 0111111111111000000000000000000000000000000000000000000000000000
Custom Not a Number(0x1) (custom 1): 0111111111111000000000000000000000100000000000000000000000000000
Custom Not a Number(0x2) (custom 2): 0111111111111000000000000000000001000000000000000000000000000000
Not a Number (regular): 01111111110000000000000000000000
Custom Not a Number(0x1) (custom 1): 01111111110000000000000000000001
Custom Not a Number(0x2) (custom 2): 01111111110000000000000000000010

谢谢鲍勃!