为什么 boost split 会导致 double free 或 corruption 问题

Why does boost split cause double free or corruption issue

我用 C++ 开发了一个 web 服务器,这里有一个函数,它导致了 coredump 问题,但我不知道为什么。

bool MyClass::hasFamilyAdminPermission(uint32_t uid) {
    ReadMutex mutex(&m_mtx); // this is read lock to lock m_familyOwner and m_familyAdmins
    if (uid == m_familyOwner) {
        return true;
    }
    std::vector<std::string> fields;
    boost::split(fields, m_familyAdmins, boost::is_any_of(","));
    std::string uidStr = boost::lexical_cast<string>(uid);
    for (std::vector<std::string>::iterator itor = fields.begin(); itor != fields.end(); ++itor) {
        if (uidStr == *itor) {
            return true;
        }
    }
    return false;
}

执行gdb ./myServer coredump_file后,得到如下输出:

warning: Unexpected size of section `.reg-xstate/8717' in core file.
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
Core was generated by `./myServer'.
Program terminated with signal SIGABRT, Aborted.

warning: Unexpected size of section `.reg-xstate/8717' in core file.
#0  0x00007f627b2c5428 in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54
54      ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
[Current thread is 1 (Thread 0x7f62277fe700 (LWP 8717))]
(gdb) where
#0  0x00007f627b2c5428 in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:54
#1  0x00007f627b2c702a in __GI_abort () at abort.c:89
#2  0x00007f627b3077ea in __libc_message (do_abort=do_abort@entry=2, fmt=fmt@entry=0x7f627b420ed8 "*** Error in `%s': %s: 0x%s ***\n")
    at ../sysdeps/posix/libc_fatal.c:175
#3  0x00007f627b31037a in malloc_printerr (ar_ptr=<optimized out>, ptr=<optimized out>,
    str=0x7f627b420fa0 "double free or corruption (fasttop)", action=3) at malloc.c:5006
#4  _int_free (av=<optimized out>, p=<optimized out>, have_lock=0) at malloc.c:3867
#5  0x00007f627b31453c in __GI___libc_free (mem=<optimized out>) at malloc.c:2968
#6  0x00007f627be650b4 in std::basic_string<char, std::char_traits<char>, std::allocator<char> >::~basic_string() ()
   from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#7  0x000000000046acc1 in boost::as_literal<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > (r=...)
    at /usr/local/include/boost/range/as_literal.hpp:102
#8  boost::algorithm::iter_split<std::vector<std::string, std::allocator<std::string> >, std::string, boost::algorithm::detail::token_finderF<boost::algorithm::detail::is_any_ofF<char> > > (Result=..., Input="604679400,1430691907,2792989999",
    Finder=<error reading variable: DWARF-2 expression error: DW_OP_reg operations must be used either alone or in conjunction with DW_OP_piece or DW_OP_bit_piece.>) at /usr/local/include/boost/algorithm/string/iter_find.hpp:153
#9  0x0000000000464c17 in boost::algorithm::split<std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::string, boost::algorithm::detail::is_any_ofF<char> > (eCompress=<optimized out>, Pred=..., Input=..., Result=...) at /usr/local/include/boost/algorithm/string/split.hpp:149
#10 server::myServer::MyClass::hasFamilyAdminPermission (this=0x7f621c066dd0, uid=<optimized out>) at MyClass.cpp:202
#11 0x0000000000435678 in server::myServer::ServerAPI::onSetMicDataReq (this=0x7fff6da7e750, uid=1430691907, req=0x7f621c094200)
    at ServerAPI.cpp:2291

看来 boost::split 导致了核心转储。

在 gdb 中,我做了如下操作:

(gdb) frame 10
#10 server::myServer::MyClass::hasFamilyAdminPermission (this=0x7f621c066dd0, uid=<optimized out>) at MyClass.cpp:202
202     MyClass.cpp: No such file or directory.
(gdb) print fields
 = std::vector of length 0, capacity 0

现在,fields 是一个空向量。

(gdb) frame 8
#8  boost::algorithm::iter_split<std::vector<std::string, std::allocator<std::string> >, std::string, boost::algorithm::detail::token_finderF<boost::algorithm::detail::is_any_ofF<char> > > (Result=..., Input="604679400,1430691907,2792989999",
    Finder=<error reading variable: DWARF-2 expression error: DW_OP_reg operations must be used either alone or in conjunction with DW_OP_piece or DW_OP_bit_piece.>) at /usr/local/include/boost/algorithm/string/iter_find.hpp:153
153     /usr/local/include/boost/algorithm/string/iter_find.hpp: No such file or directory.

这里输入的是"604679400,1430691907,2792989999",看起来还可以。

然而,当我执行info loclas时(我还在第8帧),我得到了一个分段错误。

(gdb) info locals
Tmp = std::vector of length 1891892, capacity 1891893 = {<error reading variable Tmp (Cannot access memory at address 0x0)>
itBegin = {<boost::iterator_adaptor<boost::transform_iterator<boost::algorithm::detail::copy_iterator_rangeF<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, __gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, boost::algorithm::split_iterator<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, boost::use_default, boost::use_default>, boost::algorithm::split_iterator<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::basic_string<char, std::char_traits<char>, std::allocator<char> >, boost::use_default, std::basic_string<char, std::char_traits<char>, std::allocator<char> >, boost::use_default>> = {<boost::iterator_facade<boost::transform_iterator<boost::algorithm::detail::copy_iterator_rangeF<std::basic_string<char, std::char_traits<char>, std::allocator<char> >, __gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, boost::algorithm::split_iterator<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, boost::use_default, boost::use_default>, std::basic_string<char, std::char_traits<char>, std::allocator<char> >, boost::forward_traversal_tag, std::basic_string<char, std::char_traits<char>, std::allocator<char> >, long>> = {<No data fields>},
    m_iterator = {<boost::iterator_facade<boost::algorithm::split_iterator<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, boost::iterator_range<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const, boost::forward_traversal_tag, boost::iterator_range<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, long>> = {<No data fields>}, <boost::algorithm::detail::find_iterator_base<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >> = {
        m_Finder = {<boost::function_base> = {vtable = 0x89bd9dd2, functor = {obj_ptr = 0x7f627b31453c <__GI___libc_free+76>, type = {
                type = 0x7f627b31453c <__GI___libc_free+76>, const_qualified = false, volatile_qualified = false},
              func_ptr = 0x7f627b31453c <__GI___libc_free+76>, bound_memfunc_ptr = {
                memfunc_ptr = (void (boost::detail::function::X::*)(boost::detail::function::X * const,
    int)) 0x7f627b31453c <__GI___libc_free+76>, obj_ptr = 0xe8436c057b9e1300}, obj_ref = {obj_ptr = 0x7f627b31453c <__GI___libc_free+76>,
                is_const_qualified = false, is_volatile_qualified = false},
              data = 60 '<'}}, <std::binary_function<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, __gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::iterator_range<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >> = {<No data fields>},
/usr/bin/sudo: line 11: 75240 Segmentation fault      (core dumped) ${SUDO} $@

我不知道是什么导致了coredump,我也不知道为什么我在执行后出现段错误info locals...

所以,boost::split 不会崩溃。您在其他地方有 undefined behavior

不管怎样,你为什么要一直解析一个字符串、分配一个字符串向量、与一个临时字符串进行比较等等?您可以在 integer-domain.

上执行此操作

四次。从一个简单的骨架开始:

#include <shared_mutex>
#include <string>

struct MyClass1 {
    MyClass1(uint32_t owner, std::string admins)
        : m_familyOwner(owner)
        , m_familyAdmins(std::move(admins)) {}

    bool hasFamilyAdminPermission(uint32_t uid) const;

  private:
    mutable std::shared_mutex m_mtx; // guards m_familyOwner and m_familyAdmins
    uint32_t                  m_familyOwner;
    std::string               m_familyAdmins;
};

1。比较整数,无分配

我会用Boost Spirit X3:

#include <boost/spirit/home/x3.hpp>
bool MyClass1::hasFamilyAdminPermission(uint32_t uid) const {
    std::shared_lock mutex(m_mtx);
    if (uid == m_familyOwner)
        return true;

    bool matched = false;
    auto element = boost::spirit::x3::uint32;
    auto check   = [uid, &matched](auto& ctx) {
        if (_attr(ctx) == uid) {
            matched    = true;
            _pass(ctx) = false; // short circuit for perf
        }
    };

    parse(begin(m_familyAdmins), end(m_familyAdmins), element[check] % ',');
    return matched;
}

这仍然在锁下做了很多工作,但肯定不会分配。此外,它确实 early-out,如果所有者集合可能非常大,这会有所帮助。

2。比较文本,但没有分配

使用漂亮的正则表达式,您可以在常量字符串(或字符串视图)上将数字 匹配为文本 。这里的开销是正则表达式的分配。但可以说,它要简单得多:

#include <regex>
bool MyClass2::hasFamilyAdminPermission(uint32_t uid) const {
    std::shared_lock mutex(m_mtx);
    if (uid == m_familyOwner)
        return true;

    return regex_search(m_familyAdmins, std::regex("(^|,)" + std::to_string(uid) + "(,|$)"));
}

3。在构造时解析一次

我们为什么要处理文本?我们可以将管理员放在一个集合中:

#include <set>
struct MyClass3 {
    MyClass3(uint32_t owner, std::string_view admins) : m_familyOwner(owner) {
        parse(admins.begin(), end(admins), boost::spirit::x3::uint32 % ',', m_familyAdmins);
    }
    bool hasFamilyAdminPermission(uint32_t uid) const;

  private:
    mutable std::shared_mutex m_mtx; // guards m_familyOwner and m_familyAdmins
    uint32_t                  m_familyOwner;
    std::set<uint32_t>        m_familyAdmins;
};

bool MyClass3::hasFamilyAdminPermission(uint32_t uid) const {
    std::shared_lock mutex(m_mtx);
    return uid == m_familyOwner || m_familyAdmins.contains(uid);
}

那就更简单了。但是,set 中有一些可以优化的开销。

4。解析一次,无分配,速度

std::set 具有正确的语义。然而,对于小集合来说,没有参考位置和相对较高的节点分配开销是令人遗憾的。我们可以替换为:

boost::container::flat_set< //
    uint32_t,               //
    std::less<>,            //
    boost::container::small_vector<uint32_t, 10>>
    m_familyAdmins;

这使得 <= 10 个元素的集合根本不分配,并且查找受益于连续存储。但是,以这种速度 - 除非你想处理重复的条目 - 你可能会保持线性搜索和存储:

boost::container::small_vector<uint32_t, 10>
    m_familyAdmins;

组合演示

显示所有微妙的边缘情况。请注意,仅使用 X3 解析器

  • 对 comma-separated 字符串执行输入验证会很容易
  • 可靠地比较不同格式的 uid 数字会很容易

我偷偷输入了一个以 0 开头的数字(089 而不是 89),只是为了用 std::regex 方法强调这个问题。请注意,您的原始代码也存在同样的问题。

Live On Coliru/Compiler Explorer

#include <shared_mutex>
#include <string>

struct MyClass1 {
    MyClass1(uint32_t owner, std::string admins)
        : m_familyOwner(owner)
        , m_familyAdmins(std::move(admins)) {}

    bool hasFamilyAdminPermission(uint32_t uid) const;

    private:
    mutable std::shared_mutex m_mtx; // guards m_familyOwner and m_familyAdmins
    uint32_t                  m_familyOwner;
    std::string               m_familyAdmins;
};

#include <boost/spirit/home/x3.hpp>
bool MyClass1::hasFamilyAdminPermission(uint32_t uid) const {
    std::shared_lock mutex(m_mtx);
    if (uid == m_familyOwner)
        return true;

    bool matched = false;
    auto element = boost::spirit::x3::uint32;
    auto check   = [uid, &matched](auto& ctx) {
        if (_attr(ctx) == uid) {
            matched    = true;
            _pass(ctx) = false; // short circuit for perf
        }
    };

    parse(begin(m_familyAdmins), end(m_familyAdmins), element[check] % ',');
    return matched;
}

struct MyClass2 {
    MyClass2(uint32_t owner, std::string admins)
        : m_familyOwner(owner)
        , m_familyAdmins(std::move(admins)) {}
    bool hasFamilyAdminPermission(uint32_t uid) const;

    private:
    mutable std::shared_mutex m_mtx; // guards m_familyOwner and m_familyAdmins
    uint32_t                  m_familyOwner;
    std::string               m_familyAdmins;
};

#include <regex>
bool MyClass2::hasFamilyAdminPermission(uint32_t uid) const {
    std::shared_lock mutex(m_mtx);
    if (uid == m_familyOwner)
        return true;

    return std::regex_search(m_familyAdmins, std::regex("(^|,)" + std::to_string(uid) + "(,|$)"));
}

#include <set>
struct MyClass3 {
    MyClass3(uint32_t owner, std::string_view admins) : m_familyOwner(owner) {
        parse(admins.begin(), end(admins), boost::spirit::x3::uint32 % ',', m_familyAdmins);
    }
    bool hasFamilyAdminPermission(uint32_t uid) const;

    private:
    mutable std::shared_mutex m_mtx; // guards m_familyOwner and m_familyAdmins
    uint32_t                  m_familyOwner;
    std::set<uint32_t>        m_familyAdmins;
};

bool MyClass3::hasFamilyAdminPermission(uint32_t uid) const {
    std::shared_lock mutex(m_mtx);
    return uid == m_familyOwner || m_familyAdmins.contains(uid);
}

#include <boost/container/flat_set.hpp>
#include <boost/container/small_vector.hpp>
struct MyClass4 {
    MyClass4(uint32_t owner, std::string_view admins) : m_familyOwner(owner) {
        parse(admins.begin(), end(admins), boost::spirit::x3::uint32 % ',', m_familyAdmins);
    }
    bool hasFamilyAdminPermission(uint32_t uid) const;

    private:
    mutable std::shared_mutex m_mtx; // guards m_familyOwner and m_familyAdmins
    uint32_t                  m_familyOwner;
#ifdef LINEAR_SEARCH
    // likely faster with small sets, anyways
    boost::container::small_vector<uint32_t, 10> m_familyAdmins;
#else
    boost::container::flat_set< //
        uint32_t,               //
        std::less<>,            //
        boost::container::small_vector<uint32_t, 10>>
            m_familyAdmins;
#endif
};

bool MyClass4::hasFamilyAdminPermission(uint32_t uid) const {
    std::shared_lock mutex(m_mtx);
    return uid == m_familyOwner ||
#ifndef LINEAR_SEARCH
        std::find(begin(m_familyAdmins), end(m_familyAdmins), uid) != end(m_familyAdmins);
#else
    m_familyAdmins.contains(uid);
#endif
}

#include <iostream>
int main() {
    MyClass1 const mc1{42, "21,377,34,233,55,089,144"};
    MyClass2 const mc2{42, "21,377,34,233,55,089,144"};
    MyClass3 const mc3{42, "21,377,34,233,55,089,144"};
    MyClass4 const mc4{42, "21,377,34,233,55,089,144"};

    std::cout << "uid\tdynamic\tregex\tset\tflat_set\n"
        << "\t(x3)\t-\t(x3)\t(x3)\n"
        << std::string(5 * 8, '-') << "\n";

    auto compare = [&](uint32_t uid) {
        std::cout << uid << "\t" << std::boolalpha
            << mc1.hasFamilyAdminPermission(uid) << "\t"
            << mc2.hasFamilyAdminPermission(uid) << "\t"
            << mc3.hasFamilyAdminPermission(uid) << "\t"
            << mc4.hasFamilyAdminPermission(uid) << "\n";
    };

    compare(42);
    // https://en.wikipedia.org/wiki/Fibonacci_number
    for (auto i = 3, j = 5; i < 800; std::tie(i, j) = std::tuple{j, i + j}) {
        compare(i);
    }
}

版画

id      dynamic regex   set     flat_set
        (x3)    -       (x3)    (x3)
----------------------------------------
42      true    true    true    true
3       false   false   false   false
5       false   false   false   false
8       false   false   false   false
13      false   false   false   false
21      true    true    true    true
34      true    true    true    true
55      true    true    true    true
89      true    false   true    true
144     true    true    true    true
233     true    true    true    true
377     true    true    true    true
610     false   false   false   false