在 boost multi_index_container 中查找元素

Find element in boost multi_index_container

在我的代码中,我需要有一个功能来遍历所有元素并尽快检查是否有一些元素已经存在,所以我的选择落在了 boost 多索引容器上,我可以在其中使用 vector 和 unordered_set 界面同时用于我的 class 动物。问题是我无法通过 unordered_set 界面找到一些元素,因为我将键从 std::string 替换为 std::array 并调整了代码,但我没有知道我做错了什么吗?

代码: https://wandbox.org/permlink/dnCaEzYVdXkTFBGo

#include <array>
#include <algorithm>
#include <iostream>
#include <chrono>
#include <string>
#include <vector>
#include <list>
#include <map>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <memory>

#include <boost/multi_index_container.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <boost/multi_index/composite_key.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/multi_index/sequenced_index.hpp>
#include <boost/multi_index/random_access_index.hpp>
#include <boost/multi_index/member.hpp>
#include <boost/multi_index/identity.hpp>

int constexpr elements_size{ 1'000'000 };

struct Animal
{
Animal(std::string name, std::string description, int leg, int age, double maxSpeed) noexcept :
    description_{std::move(description)}, leg_{leg}, age_{age}, maxSpeed_{maxSpeed}
{
    std::copy(name.begin(), name.end(), name_.data());
}

Animal(std::string const& name, std::string const& description) noexcept :
    description_{description}
{
    std::copy(name.begin(), name.end(), name_.data());
}

Animal(Animal&& animal) noexcept
{
    name_ = name_;
    description_ = std::move(animal).description_;
    leg_ = animal.leg_;
    age_ = animal.age_;
    maxSpeed_ = animal.maxSpeed_;
}

Animal(Animal const& animal) noexcept
{
    name_ = animal.name_;
    description_ = animal.description_;
    leg_ = animal.leg_;
    age_ = animal.age_;
    maxSpeed_ = animal.maxSpeed_;
}

Animal& operator=(Animal&& animal) noexcept
{
    name_ = name_;
    description_ = std::move(animal).description_;
    leg_ = animal.leg_;
    age_ = animal.age_;
    maxSpeed_ = animal.maxSpeed_;
    return *this;
}

Animal& operator=(Animal const& animal) noexcept
{
    name_ = animal.name_;
    description_ = animal.description_;
    leg_ = animal.leg_;
    age_ = animal.age_;
    maxSpeed_ = animal.maxSpeed_;
    return *this;
}

std::array<char, 50> name_;
std::string description_;
int leg_{0};
int age_{0};
double maxSpeed_{0.0};
};

struct Hasher
{
bool print_;
Hasher(bool print = false): print_{print} {}
std::size_t operator()(std::array<char, 50> const& name) const
{
    if (print_)
        std::cout << "array hash" << std::hash<std::string_view>{}(name.data()) << std::endl;
    return std::hash<std::string_view>{}(name.data());
}
std::size_t operator()(std::string const& name) const
{
    if (print_)
        std::cout << "string hash" << std::hash<std::string_view>{}(name.c_str()) << std::endl;
    return std::hash<std::string_view>{}(name.c_str());
}
std::size_t operator()(const char* name) const
{
    if (print_)
        std::cout << "char hash" << std::hash<std::string_view>{}(name) << std::endl;
    return std::hash<std::string_view>{}(name);
}
};

struct KeysComparator
{
bool operator()(std::array<char, 50> const& a1, std::array<char, 50> const& a2) const {return a1 == a2; }
template <typename T>
bool operator()(std::string const& n1, T const& t) const
{
    std::cout << "### value.name_" << t.value.name_.data() << ", n1: " << n1 << std::endl;
    return n1 == t.value.name_.data();
}
};

template<typename TimePoint>
std::string getElapsedTime(TimePoint const& start, TimePoint const& end)
{
auto micro = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(micro);
auto sec = std::chrono::duration_cast<std::chrono::seconds>(milli);

return {std::to_string(micro.count()) + " µs, " + std::to_string(milli.count()) + " ms, " + std::to_string(sec.count()) + " s"};
}

template<typename TimePoint>
void printStatistics(TimePoint const& emplace_start, TimePoint const& emplace_end, TimePoint const& iterate_start, TimePoint const& iterate_end,
TimePoint const& find_start, TimePoint const& find_end, intmax_t const sum, std::string target)
{
std::cout << "Elapsed time emplace: "  << getElapsedTime(emplace_start, emplace_end)
    << "  |  iterate: " << getElapsedTime(iterate_start, iterate_end)
    << "  |  find: " << getElapsedTime(find_start, find_end)
    << ", sum:" << sum << " , calculation for " << target << std::endl;
}

void test()
{
using namespace boost::multi_index;
using Animal_multi = multi_index_container<Animal, indexed_by<
    random_access<>,
    hashed_unique<
        composite_key<Animal, member<Animal, std::array<char, 50>, &Animal::name_>>,
        composite_key_hash<Hasher>,
        composite_key_equal_to<KeysComparator>>
>>;

Animal_multi container;
auto emplace_start = std::chrono::steady_clock::now();
for (auto i = 0; i < elements_size; ++i)
    container.emplace_back("the really long name of some animal 12345678910_" + std::to_string(i),
                           "bla bla bla bla bla bla bla bla bla bla bla bla bla", 4, i, i + 2);
auto emplace_end = std::chrono::steady_clock::now();

intmax_t sum{0};
auto iterate_start = std::chrono::steady_clock::now();
for (auto const& e : container)
    sum += e.age_;
auto iterate_end = std::chrono::steady_clock::now();

KeysComparator key_comparator;
Hasher hasher{true};
auto find_start = std::chrono::steady_clock::now();
auto &container_interface = container.get<1>();
auto isSucceeded = container_interface.count("the really long name of some animal 12345678910_" + std::to_string(elements_size-1),
                                             hasher, key_comparator);
if (not isSucceeded)
    std::cout << "WARN: Element has not been found." << std::endl;
auto find_end = std::chrono::steady_clock::now();

printStatistics(emplace_start, emplace_end, iterate_start, iterate_end, find_start, find_end, sum, "Animal_multi (boost multi_index)");
}

int main()
{
    test();
    return 0;
}

在移动构造函数中存在许多错误:

name_ = name_; // oops this does nothing at all

关注Rule Of Zero即可。这也会通知您 std::string copy/assignment 是 而不是 noexcept.

名称副本应该有长度限制:

std::copy_n(name.begin(), std::min(name.size(), name_.size()), name_.data());

此时我注意到一些事情可以解释你的问题:你没有以 NUL 终止,也没有确保数组是 0 初始化的。

宾果

的确,我看到了几行:

return std::hash<std::string_view>{}(name.data());

那是……UB!您的 string_view 可能包含不确定的数据,但更糟糕的是,您永远不会复制终止 NUL 字符。因此,std::string_view 将对长度不确定的字符串建模,该字符串可能会超过 50。

Read here about Nasal Demons (UB)

Such are the perils of skipping standard library types for the old C craft.

第一次挖掘

因此,这是具有 equal/better 特征的 class 的全部内容:

using Name = std::array<char, 50>;

struct Animal {
    Animal(std::string_view name, std::string description,
           int leg = 0, int age = 0, double maxSpeed = 0) noexcept 
        : name_{0}, // zero initialize!
          description_{std::move(description)},
          leg_{leg},
          age_{age},
          maxSpeed_{maxSpeed}
    {
        constexpr auto Capacity = std::tuple_size<Name>::value;
        constexpr auto MaxLen = Capacity - 1; // reserve NUL char

        assert(name.length() < MaxLen);
        std::copy_n(name.data(), std::min(name.length(), MaxLen), name_.data());
    }

    //Animal            ( Animal&&      animal  ) noexcept = default;
    //Animal            ( Animal const& animal  )          = default;
    //Animal& operator= ( Animal&&      animal  ) noexcept = default;
    //Animal& operator= ( Animal const& animal  )          = default;

    Name        name_;
    std::string description_;
    int         leg_{0};
    int         age_{0};
    double      maxSpeed_{0.0};
};

改善:FixedString

这只是 尖叫 更好的 Name 类型。怎么样,FixedString:

template <size_t N> struct FixedString {
    static_assert(N > 1); // require space for NUL char

    FixedString(std::string_view s) : data_{0} {
        if (s.length() >= N)
            throw std::length_error("FixedString");
        std::copy_n(s.data(), std::min(s.length(), N - 1), data());
    }

    std::string_view str() const { return { data(), size() }; }
    operator std::string_view() const { return str(); }

    auto data()   const { return data_.data(); }
    auto data()         { return data_.data(); }
    auto c_str()  const { return data_.data(); }
    auto c_str()        { return data_.data(); }

    auto begin() const { return data_.begin(); }
    auto end()   const { return data_.end();   }
    auto begin()       { return data_.begin(); }
    auto end()         { return data_.end();   }

    size_t size() const {
        auto terminator = std::memchr(data(), 0, data_.max_size());
        return terminator
            ? static_cast<char const*>(terminator) - data()
            : data_.max_size();
    };

    bool operator<(FixedString const& rhs)       const { return str() <  rhs.str(); }
    bool operator==(FixedString const& rhs)      const { return str() == rhs.str(); }
    bool operator!=(FixedString const& rhs)      const { return str() != rhs.str(); }
    // optimizations:
    bool operator<(std::string_view const& rhs)  const { return str() <  rhs.substr(0, N-1); }
    bool operator==(std::string_view const& rhs) const { return str() == rhs.substr(0, N-1); }
    bool operator!=(std::string_view const& rhs) const { return str() != rhs.substr(0, N-1); }

  private:
    std::array<char, N> data_;
};

现在您可以简单地

using Name = FixedString<50>;

并且您所有的 Name 都将神奇地(并且 安全地 )与字符串视图相互转换。

using Name = FixedString<50>;

struct Animal {
    Animal(std::string_view name, std::string description,
           int leg = 0, int age = 0, double maxSpeed = 0) noexcept 
        : name_{name}, description_{std::move(description)},
          leg_{leg}, age_{age}, maxSpeed_{maxSpeed}
    { }

    Name        name_;
    std::string description_;
    int         leg_{0};
    int         age_{0};
    double      maxSpeed_{0.0};
};

一切都通过正确的抽象来简化

这是我认为我在编程生涯中学到的最重要的一课:选择正确的抽象导致简单。在这里,我们蒸发了两个凌乱的帮手:

using Hasher         = std::hash<std::string_view>;
using KeysComparator = std::equal_to<Name>;

繁荣。他们做你做过的一切,但做得更好。

现在,缺失的元素

将整个事情简化为 this 之后,很明显 std::array<char, 50> 可以 永远不会 正确包含超过 50 个字符的名称。事实上,检查插入:

auto emplace_start = Now();

size_t duplicates = 0;
for (auto i = 0; i < elements_size; ++i) {
    auto [_, ok] = container.emplace_back(
        make_name(i), "bla bla bla bla bla bla bla bla bla bla bla bla bla",
        4, i, i + 2);

    if (!ok) ++duplicates;
}
if (duplicates) {
    std::cerr << "Oops, " << duplicates << " duplicate keys not inserted\n";
}

auto emplace_end = Now();

表明:

Oops, 999990 duplicate keys not inserted
Elapsed time emplace: 116.491ms iterate: 0.000145ms find: 0.000597ms, sum:45 , calculation for Animal_multi (boost multi_index)

At least, now you replaced Undefined Behaviour with constraint checks.

当然,只需增加名称容量即可解决问题:[https://wandbox.org/permlink/6AamJfXe76nYALfR)

using Name = FixedString<60>;

打印:

Elapsed time emplace: 594.475ms iterate: 18.6076ms find: 0.003138ms, sum:499999500000 , calculation for Animal_multi (boost multi_index)

或者你可以用一个过长的名字来添加 Name 结构:Live On Wandbox

FixedString(std::string_view s) : data_{0} {
    if (s.length() >= N)
        throw std::length_error("FixedString");
    std::copy_n(s.data(), std::min(s.length(), N - 1), data());
}

正确打印

terminate called after throwing an instance of 'std::length_error'
  what():  FixedString

Full Listing

此演示使用 FixedString<60> 来避免按键错误:

#include <boost/multi_index_container.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/multi_index/random_access_index.hpp>
#include <boost/multi_index/member.hpp>
#include <iostream>
#include <iomanip>
#include <chrono>
using namespace std::chrono_literals;

int constexpr elements_size{ 1'000'000 };

template <size_t N> struct FixedString {
    static_assert(N > 1); // require space for NUL char

    FixedString(std::string_view s) : data_{0} {
        if (s.length() >= N)
            throw std::length_error("FixedString");
        std::copy_n(s.data(), std::min(s.length(), N - 1), data());
    }

    std::string_view str() const { return { data(), size() }; }
    operator std::string_view() const { return str(); }

    auto data()   const { return data_.data(); }
    auto data()         { return data_.data(); }
    auto c_str()  const { return data_.data(); }
    auto c_str()        { return data_.data(); }

    auto begin() const { return data_.begin(); }
    auto end()   const { return data_.end();   }
    auto begin()       { return data_.begin(); }
    auto end()         { return data_.end();   }

    size_t size() const {
        auto terminator = std::memchr(data(), 0, data_.max_size());
        return terminator
            ? static_cast<char const*>(terminator) - data()
            : data_.max_size();
    };

    bool operator<(std::string_view const& rhs)  const { return str() <  rhs.substr(0, N-1); }
    bool operator==(std::string_view const& rhs) const { return str() == rhs.substr(0, N-1); }
    bool operator!=(std::string_view const& rhs) const { return str() != rhs.substr(0, N-1); }
    bool operator<(FixedString const& rhs)       const { return str() <  rhs.str(); }
    bool operator==(FixedString const& rhs)      const { return str() == rhs.str(); }
    bool operator!=(FixedString const& rhs)      const { return str() != rhs.str(); }

  private:
    std::array<char, N> data_;
};

using Name = FixedString<60>;

struct Animal {
    Animal(std::string_view name, std::string description,
           int leg = 0, int age = 0, double maxSpeed = 0) noexcept 
        : name_{name}, description_{std::move(description)},
          leg_{leg}, age_{age}, maxSpeed_{maxSpeed}
    { }

    Name        name_;
    std::string description_;
    int         leg_{0};
    int         age_{0};
    double      maxSpeed_{0.0};
};

using Hasher         = std::hash<std::string_view>;
using KeysComparator = std::equal_to<Name>;

using Clock     = std::chrono::steady_clock;
using Duration  = Clock::duration;
static auto Now = Clock::now;

void printStatistics(Duration emplace, Duration iterate, Duration find,
                     intmax_t const sum, std::string target)
{
    std::cout << "Elapsed time"
        << " emplace: " << (emplace/1.0ms) << "ms"
        << " iterate: " << (iterate/1.0ms) << "ms"
        << " find: " << (find/1.0ms) << "ms"
        << ", sum:" << sum
        << " , calculation for " << target
        << std::endl;
}

void test() {
    namespace bmi = boost::multi_index;
    using Animal_multi = bmi::multi_index_container<Animal,
      bmi::indexed_by<
        bmi::random_access<>,
        bmi::hashed_unique<
            bmi::tag<struct by_name>,
            bmi::member<Animal, Name, &Animal::name_>, Hasher, KeysComparator>
        >
    >;

    Animal_multi container;

    auto make_name = [](size_t id) {
        return "the really long name of some animal 12345678910_" + std::to_string(id);
    };

    auto emplace_start = Now();

    size_t duplicates = 0;
    for (auto i = 0; i < elements_size; ++i) {
        auto [_, ok] = container.emplace_back(
            make_name(i), "bla bla bla bla bla bla bla bla bla bla bla bla bla",
            4, i, i + 2);

        if (!ok) ++duplicates;
    }
    if (duplicates) {
        std::cerr << "Oops, " << duplicates << " duplicate keys not inserted\n";
    }

    auto emplace_end = Now();

    intmax_t sum{ 0 };
    auto iterate_start = Now();
    for (auto const& e : container) {
        sum += e.age_;
    }
    auto iterate_end = Now();

    auto find_start = Now();
    {
        auto& name_idx = container.get<by_name>();
        auto last_key = make_name(elements_size - 1);
        if (name_idx.count(std::string_view(last_key)) == 0u) {
            std::cout << "WARN: Element has not been found." << std::endl;
        }
    }
    auto find_end = Now();

    printStatistics(
            emplace_end - emplace_start,
            iterate_end - iterate_start,
            find_end - find_start, sum,
            "Animal_multi (boost multi_index)");
}

int main() { test(); }