使用 boost::mapped_region 增加文件以进一步写入?

Grow a file for further writing to using boost::mapped_region?

我需要创建并写入内存映射文件。有时需要增大文件。

我创建了以下小测试,我创建了一个文件,使用 boost::mapped_region 映射它并写入它。

这一切都按预期工作:

#include <fstream>
#include <boost/interprocess/file_mapping.hpp>
#include <boost/interprocess/mapped_region.hpp>

namespace bip = boost::interprocess;

void createFile(const char* fn, std::uint64_t num)
{
    std::filebuf fbuf;
    fbuf.open(fn, std::ios_base::out|std::ios_base::binary|std::ios_base::trunc);

    const std::uint64_t size = sizeof(std::uint64_t) * (num + 1);

    fbuf.pubseekoff(size - 1, std::ios_base::beg);
    fbuf.sputc(0);
    fbuf.close();
}

void writeToFile(const char* fn, std::uint64_t pos, std::uint64_t val)
{
    bip::file_mapping  fm(fn, bip::read_write);
    bip::mapped_region rg(fm, bip::read_write);

    std::uint64_t* p = reinterpret_cast<std::uint64_t*>(rg.get_address());

    *p = std::max(*p, pos); // store max num values
    *(p + pos) = val;       // write value into position
}

int main ()
{
    const char* fn = "/tmp/test.dat";

    createFile(fn, 3);
    writeToFile(fn, 1, 0x1111111111111111);
    writeToFile(fn, 2, 0x2222222222222222);
    writeToFile(fn, 3, 0x3333333333333333);

    return 0;
}

运行 该程序按预期生成输出文件,当我转储其内容时,我可以看到其中正确写入的值:

$ xxd -p /tmp/test.dat 
030000000000000011111111111111112222222222222222333333333333
3333

但是,现在我想调整文件的大小,以便我可以在最后写入额外的数据。

我添加了以下函数,growFile(使用下面 timrau 建议的 ios_base::app

void growFile(const char* fn, std::uint64_t num)
{
    std::filebuf fbuf;
    fbuf.open(fn, std::ios_base::out|std::ios_base::binary|std::ios_base::app);

    const std::uint64_t size = sizeof(std::uint64_t) * (num + 1);

    fbuf.pubseekoff(size - 1, std::ios_base::beg);
    fbuf.sputc(0);
    fbuf.close();
}

我现在在增大文件后添加更多值:

int main ()
{
    const char* fn = "/tmp/test.dat";

    createFile(fn, 3);
    writeToFile(fn, 1, 0x1111111111111111);
    writeToFile(fn, 2, 0x2222222222222222);
    writeToFile(fn, 3, 0x3333333333333333);

    growFile(fn, 6);
    writeToFile(fn, 4, 0x4444444444444444);
    writeToFile(fn, 5, 0x5555555555555555);
    writeToFile(fn, 6, 0x6666666666666666);

    return 0;
}

当我转储文件时,它丢失了大部分新值。

$ xxd -p /tmp/test.dat 
060000000000000011111111111111112222222222222222333333333333
333344

请注意,如果我不增加文件,而只是在最初创建时使用足够的 space,它会按预期工作:

int main ()
{
    const char* fn = "/tmp/test.dat";

    createFile(fn, 6);
    writeToFile(fn, 1, 0x1111111111111111);
    writeToFile(fn, 2, 0x2222222222222222);
    writeToFile(fn, 3, 0x3333333333333333);
    writeToFile(fn, 4, 0x4444444444444444);
    writeToFile(fn, 5, 0x5555555555555555);
    writeToFile(fn, 6, 0x6666666666666666);

    return 0;
}

当我转储文件时,所有值都在那里:

$ xxd -p /tmp/test.dat 
060000000000000011111111111111112222222222222222333333333333
3333444444444444444455555555555555556666666666666666

我如何在创建文件后增大我的文件,以便我可以进一步写入它的初始大小?

resizeFile() 时,您应该以追加模式打开文件。否则,文件在 open().

上被截断为空文件
fbuf.open(fn, std::ios_base::out | std::ios_base::binary | std::ios_base::app);

我有 另一个 在 coliru 上玩。

您必须为 read/write 打开文件 - 相当于 ::fopen(..., "r+")

这些选项都有效:

void resizeFile(const char* fn)
{
    constexpr auto offset = sizeof(std::uint64_t) * 6 - 1;

    /*
    FILE* fp = ::fopen(fn, "r+");
    ::fseek(fp, offset, SEEK_SET);
    ::fputc(0, fp);
    ::fclose(fp);
    */

/*
    std::fstream f;
    f.exceptions(std::ios::failbit | std::ios::badbit);
    f.open(fn, std::ios_base::in | std::ios_base::out | std::ios_base::binary);
    f.seekp(offset, std::ios_base::beg);
    f.put(0);
    f.flush();
*/

    std::filebuf fbuf;
    fbuf.open(fn, std::ios_base::in | std::ios_base::out | std::ios_base::binary);
    fbuf.pubseekoff(offset, std::ios_base::beg);
    fbuf.sputc(0);
    fbuf.close();
}

http://coliru.stacked-crooked.com/a/ae224032dd036639

似乎正在使用 boost::iostreams::mapped_file 截断/创建文件;并随后附加 std::ofstream 可在 windows 上提供非常快速的写入。只是猜测,但似乎 mapped_file::close() 被推迟并且 ofstream 能够利用开放的 mapped_file.

  • 左侧:创建 = ofstream,追加 = ofstream
  • 中间:创建=mapped_file,追加=mapped_file
  • 右:创建=mapped_file,附加=ofstream

一些计时(MSVC 15.9.19;boost 1.72):

Time [ms] = 5.6878 ; 0.774 ; 0.7593
Time [ms] = 6.7207 ; 8.2712 ; 0.3294
Time [ms] = 5.8094 ; 4.7558 ; 0.439
Time [ms] = 3.3206 ; 4.7963 ; 0.324
Time [ms] = 5.2561 ; 3.9712 ; 0.3331
Time [ms] = 3.9206 ; 4.0262 ; 0.3952
Time [ms] = 3.0896 ; 3.9835 ; 0.3359
Time [ms] = 9.9593 ; 4.9418 ; 0.3266
Time [ms] = 3.7967 ; 4.9202 ; 0.3138
Time [ms] = 3.1793 ; 3.8531 ; 0.3195
Time [ms] = 3.0293 ; 3.7158 ; 0.3453
Time [ms] = 2.885 ; 3.6458 ; 0.3262
Time [ms] = 2.9635 ; 3.8436 ; 0.321
Time [ms] = 3.0339 ; 3.8216 ; 0.3427
Time [ms] = 2.8762 ; 3.7251 ; 0.3334
Time [ms] = 2.9138 ; 4.4343 ; 0.3165
Same 1 = 1
Same 2 = 1
Same 3 = 1

使用以下代码生成:

#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/filesystem.hpp>

#include <vector>
#include <iostream>
#include <random>
#include <chrono>
#include <stdint.h>
#include <stdio.h>

template <class Type, bool append>
void writer_std(const std::string& filename, const std::vector<Type>& data)
{
    size_t bytes = sizeof(Type) * data.size();
    std::ofstream writer;
    writer.open(filename, std::ios::binary | std::ios::out | (append ? std::ios::app : std::ios::trunc));
    writer.write(reinterpret_cast<const char *>(&data[0]), bytes);
    writer.close();
}

template <class Type>
void writer_boost_trunc(const std::string& filename, const std::vector<Type>& data)
{
    size_t bytes = sizeof(Type) * data.size();
    boost::iostreams::mapped_file_params params(filename);
    params.new_file_size = bytes; // overwrites filename if non-zero
    params.flags = boost::iostreams::mapped_file::mapmode::readwrite;
    boost::iostreams::mapped_file mf;
    mf.open(params);
    char * buffer = reinterpret_cast<char *>(mf.data());
    memcpy(buffer, reinterpret_cast<const char *>(&data[0]), bytes);
    mf.close();
}

template <class Type>
void writer_boost_append(const std::string& filename, const std::vector<Type>& data)
{
    size_t bytes = sizeof(Type) * data.size();

#pragma warning(push)
#pragma warning(disable: 4996)
    FILE * file = fopen(filename.c_str(), "r+");
#pragma warning(pop)
    fseek(file, 0, SEEK_END);
    size_t current = ftell(file);
    size_t larger  = current + bytes;
    fseek(file, static_cast<long>(larger - 1), SEEK_SET);
    fputc(0, file);
    fclose(file);

    size_t alignment = boost::iostreams::mapped_file::alignment();
    boost::iostreams::mapped_file_params params(filename);
    params.flags  = boost::iostreams::mapped_file::mapmode::readwrite;
    params.offset = alignment * (current / alignment);
    params.length = larger - params.offset;
    boost::iostreams::mapped_file mf;
    mf.open(params);
    char * buffer = reinterpret_cast<char *>(mf.data()) + (current - params.offset);
    memcpy(buffer, reinterpret_cast<const char *>(&data[0]), bytes);
    mf.close();
}

template <class Type>
std::vector<Type> read(const std::string& filename, const size_t begin, const size_t number)
{
    const size_t begin_byte =  begin * sizeof(Type);
    const size_t  size_byte = number * sizeof(Type);
    const size_t   end_byte = begin_byte + size_byte;
    std::vector<Type> result(number);
    size_t alignment = boost::iostreams::mapped_file::alignment();
    boost::iostreams::mapped_file_params params(filename);
    params.flags  = boost::iostreams::mapped_file::mapmode::readonly;
    params.offset = alignment * (begin_byte / alignment);
    params.length = end_byte - params.offset;
    boost::iostreams::mapped_file mf;
    mf.open(params);
    const char * buffer = reinterpret_cast<const char *>(mf.const_data()) + (begin_byte - params.offset);
    memcpy(reinterpret_cast<char *>(&result[0]), buffer, size_byte);
    mf.close();
    return result;
}

int main()
{
    std::random_device rd;
    std::mt19937_64 gen(rd());
    std::uniform_int_distribution<uint64_t> dis(0, UINT64_MAX);

    constexpr size_t num_batches = 16;
    constexpr size_t batch_size  = 65536;

    std::vector<std::vector<uint64_t>> data(num_batches);
    for (size_t batch = 0; batch < num_batches; ++batch)
    {
        data[batch].reserve(batch_size);
        for (size_t item = 0; item < batch_size; ++item)
            data[batch].push_back(dis(gen));
    }

    const std::string f1 = "dump1.bin";
    const std::string f2 = "dump2.bin";
    const std::string f3 = "dump3.bin";

    double test1[num_batches];
    for (size_t batch = 0; batch < num_batches; ++batch)
    {
        auto t1 = std::chrono::system_clock::now();
        if (batch == 0)
            writer_std<uint64_t, false>(f1, data[batch]);
        else
            writer_std<uint64_t, true>(f1, data[batch]);
        auto t2 = std::chrono::system_clock::now();
        test1[batch] = 1e-6 * std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
    }

    double test2[num_batches];
    for (size_t batch = 0; batch < num_batches; ++batch)
    {
        auto t1 = std::chrono::system_clock::now();
        if (batch == 0)
            writer_boost_trunc<uint64_t>(f2, data[batch]);
        else
            writer_boost_append<uint64_t>(f2, data[batch]);
        auto t2 = std::chrono::system_clock::now();
        test2[batch] = 1e-6 * std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
    }

    double test3[num_batches];
    for (size_t batch = 0; batch < num_batches; ++batch)
    {
        auto t1 = std::chrono::system_clock::now();
        if (batch == 0)
            writer_boost_trunc<uint64_t>(f3, data[batch]);
        else
            writer_std<uint64_t, true>(f3, data[batch]);
        auto t2 = std::chrono::system_clock::now();
        test3[batch] = 1e-6 * std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
    }

    for (size_t batch = 0; batch < num_batches; ++batch)
        std::cout << "Time [ms] = " << test1[batch] << " ; " << test2[batch] << " ; " << test3[batch] << std::endl;

    bool same1 = true;
    for (size_t batch = 0; batch < num_batches; ++batch)
    {
        std::vector<uint64_t> part = read<uint64_t>(f1, batch * batch_size, batch_size);
        for (size_t item = 0; item < batch_size; ++item)
            same1 = same1 && part[item] == data[batch][item];
    }

    std::cout << "Same 1 = " << same1 << std::endl;

    bool same2 = true;
    for (size_t batch = 0; batch < num_batches; ++batch)
    {
        std::vector<uint64_t> part = read<uint64_t>(f2, batch * batch_size, batch_size);
        for (size_t item = 0; item < batch_size; ++item)
            same2 = same2 && part[item] == data[batch][item];
    }

    std::cout << "Same 2 = " << same2 << std::endl;

    bool same3 = true;
    for (size_t batch = 0; batch < num_batches; ++batch)
    {
        std::vector<uint64_t> part = read<uint64_t>(f3, batch * batch_size, batch_size);
        for (size_t item = 0; item < batch_size; ++item)
            same3 = same3 && part[item] == data[batch][item];
    }

    std::cout << "Same 3 = " << same3 << std::endl;

    boost::filesystem::path p1(f1); boost::filesystem::remove(p1);
    boost::filesystem::path p2(f2); boost::filesystem::remove(p2);
    boost::filesystem::path p3(f3); boost::filesystem::remove(p3);

    return 0;
}