通过加载二进制数据而不是文本并转换来提高代码性能

Improving code performance by loading binary data instead of text and converting

您好,我正在使用现有的 C++ 代码,我通常使用 VB.NET,但我看到的大部分内容对我来说都是令人困惑和矛盾的。

现有代码从编码如下的文件加载神经网络权重:

2
model.0.conv.conv.weight 5 3e17c000 3e9be000 3e844000 bc2f8000 3d676000
model.0.conv.bn.weight 7  4006a000 3f664000 3fc98000 3fa6a000 3ff2e000 3f5dc000 3fc94000

第一行给出后续行数。这些行中的每一行都有一个描述,一个数字表示后面有多少个值,然后是十六进制的权重值。在真实文件中有数百行,每行可能有数十万个权重。权重文件大小为 400MB。这些值被转换为浮点数以供在 NN 中使用。

解码此文件需要 3 分钟多的时间。我希望通过消除从十六进制编码到二进制的转换并仅将值本地存储为浮点数来提高性能。问题是我无法理解代码在做什么,也无法理解我应该如何以二进制形式存储这些值。解码行的相关部分在这里:

while (count--)
    {
        Weights wt{ DataType::kFLOAT, nullptr, 0 };
        uint32_t size;

        // Read name and type of blob
        std::string name;
        input >> name >> std::dec >> size;
        wt.type = DataType::kFLOAT;

        // Load blob
        uint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));
        for (uint32_t x = 0, y = size; x < y; ++x)
        {
            input >> std::hex >> val[x];
        }
        wt.values = val;

        wt.count = size;
        weightMap[name] = wt;
    }

Weights class is described here. DataType::kFLOAT 是一个 32 位浮点数。

我希望在 input >> std::hex >> val[x]; 下面的内部循环中添加一行,这样我就可以将浮点值写入二进制文件,因为这些值是从十六进制转换而来的,但我不明白是什么正在进行。看起来正在分配内存来保存值,但 sizeof(val) 是 8 个字节,uint32_t 是 4 个字节。此外,看起来这些值存储在 valwt.values 中,但 val 包含整数而不是浮点数。我真的不明白这里的意图。

我能得到一些关于如何存储和加载二进制值以消除十六进制转换的建议吗?任何意见,将不胜感激。很多。

这是一个示例程序,可以将显示的文本格式转换为二进制格式,然后再转换回来。我从问题中获取数据并转换为二进制并成功返回。我的感觉是,在将数据用于实际应用程序之前,最好先使用单独的程序来处理数据,这样应用程序读取代码的目的就单一了。

最后还有一个如何将二进制文件读入 Weights class 的例子。我不使用 TensorRT,所以我复制了文档中使用的两个 classes,以便示例编译。确保您没有将这些添加到您的实际代码中。

如果您有任何问题,请告诉我。希望这有助于加快加载速度。

#include <fstream>
#include <iostream>
#include <unordered_map>
#include <vector>

void usage()
{
    std::cerr << "Usage: convert <operation> <input file> <output file>\n";
    std::cerr << "\tconvert b in.txt out.bin - Convert text to binary\n";
    std::cerr << "\tconvert t in.bin out.txt - Convert binary to text\n";
}

bool text_to_binary(const char *infilename, const char *outfilename)
{
    std::ifstream in(infilename);
    if (!in)
    {
        std::cerr << "Error: Could not open input file '" << infilename << "'\n";
        return false;
    }

    std::ofstream out(outfilename, std::ios::binary);
    if (!out)
    {
        std::cerr << "Error: Could not open output file '" << outfilename << "'\n";
        return false;
    }

    uint32_t line_count;
    if (!(in >> line_count))
    {
        return false;
    }
    if (!out.write(reinterpret_cast<const char *>(&line_count), sizeof(line_count)))
    {
        return false;
    }
    for (uint32_t l = 0; l < line_count; ++l)
    {
        std::string name;
        uint32_t num_values;
        if (!(in >> name >> std::dec >> num_values))
        {
            return false;
        }

        std::vector<uint32_t> values(num_values);
        for (uint32_t i = 0; i < num_values; ++i)
        {
            if (!(in >> std::hex >> values[i]))
            {
                return false;
            }
        }

        uint32_t name_size = static_cast<uint32_t>(name.size());
        bool result = out.write(reinterpret_cast<const char *>(&name_size), sizeof(name_size)) &&
            out.write(name.data(), name.size()) &&
            out.write(reinterpret_cast<const char *>(&num_values), sizeof(num_values)) &&
            out.write(reinterpret_cast<const char *>(values.data()), values.size() * sizeof(values[0]));
        if (!result)
        {
            return false;
        }
    }
    return true;
}

bool binary_to_text(const char *infilename, const char *outfilename)
{
    std::ifstream in(infilename, std::ios::binary);
    if (!in)
    {
        std::cerr << "Error: Could not open input file '" << infilename << "'\n";
        return false;
    }

    std::ofstream out(outfilename);
    if (!out)
    {
        std::cerr << "Error: Could not open output file '" << outfilename << "'\n";
        return false;
    }

    uint32_t line_count;
    if (!in.read(reinterpret_cast<char *>(&line_count), sizeof(line_count)))
    {
        return false;
    }
    if (!(out << line_count << "\n"))
    {
        return false;
    }
    for (uint32_t l = 0; l < line_count; ++l)
    {
        uint32_t name_size;
        if (!in.read(reinterpret_cast<char *>(&name_size), sizeof(name_size)))
        {
            return false;
        }
        std::string name(name_size, 0);
        if (!in.read(name.data(), name_size))
        {
            return false;
        }

        uint32_t num_values;
        if (!in.read(reinterpret_cast<char *>(&num_values), sizeof(num_values)))
        {
            return false;
        }

        std::vector<float> values(num_values);
        if (!in.read(reinterpret_cast<char *>(values.data()), num_values * sizeof(values[0])))
        {
            return false;
        }

        if (!(out << name << " " << std::dec << num_values))
        {
            return false;
        }
        for (float &f : values)
        {
            uint32_t i;
            memcpy(&i, &f, sizeof(i));
            if (!(out << " " << std::hex << i))
            {
                return false;
            }
        }
        if (!(out << "\n"))
        {
            return false;
        }
    }
    return true;
}

int main(int argc, const char *argv[])
{
    if (argc != 4)
    {
        usage();
        return EXIT_FAILURE;
    }

    char op = argv[1][0];
    bool result = false;
    switch (op)
    {
    case 'b':
    case 'B':
        result = text_to_binary(argv[2], argv[3]);
        break;
    case 't':
    case 'T':
        result = binary_to_text(argv[2], argv[3]);
        break;
    default:
        usage();
        break;
    }
    return result ? EXIT_SUCCESS : EXIT_FAILURE;
}

// Possible implementation of the code snippet in the original question to read the weights

// START Copied from TensorRT documentation - Do not include in your code
enum class DataType : int32_t
{
    kFLOAT = 0,
    kHALF = 1,
    kINT8 = 2,
    kINT32 = 3,
    kBOOL = 4
};

class Weights
{
public:
    DataType type;
    const void *values;
    int64_t count;
};
// END Copied from TensorRT documentation - Do not include in your code

bool read_weights(const char *infilename)
{
    std::unordered_map<std::string, Weights> weightMap;

    std::ifstream in(infilename, std::ios::binary);
    if (!in)
    {
        std::cerr << "Error: Could not open input file '" << infilename << "'\n";
        return false;
    }

    uint32_t line_count;
    if (!in.read(reinterpret_cast<char *>(&line_count), sizeof(line_count)))
    {
        return false;
    }

    for (uint32_t l = 0; l < line_count; ++l)
    {
        uint32_t name_size;
        if (!in.read(reinterpret_cast<char *>(&name_size), sizeof(name_size)))
        {
            return false;
        }
        std::string name(name_size, 0);
        if (!in.read(name.data(), name_size))
        {
            return false;
        }

        uint32_t num_values;
        if (!in.read(reinterpret_cast<char *>(&num_values), sizeof(num_values)))
        {
            return false;
        }

        // Normally I would use float* values = new float[num_values]; here which
        // requires delete [] ptr; to free the memory later.
        // I used malloc to match the original example since I don't know who is
        // responsible to clean things up later, and TensorRT might use free(ptr)
        // Makes no real difference as long as new/delete ro malloc/free are matched up.
        float *values = reinterpret_cast<float *>(malloc(num_values * sizeof(*values)));
        if (!in.read(reinterpret_cast<char *>(values), num_values * sizeof(*values)))
        {
            return false;
        }
        weightMap[name] = Weights { DataType::kFLOAT, values, num_values };
    }
    return true;
}