C++ 上的 B-Tree 实现有内存泄漏?

B-Tree implementation on C++ has a memory leak?

我有这个 B-Tree 实现,我正在使用 search() 和 insert() 对其进行测试。测试基本上是这样的:

void function(){
    BTree b16(16);
    // do lots of inserts and searchs on b16
}
for(many_times){
    function();
}

如果我没理解错的话,在function()的每次迭代之后,b16应该被销毁。然而,在 ~250 次迭代后,我得到一个 bad_alloc 错误,这意味着我有内存泄漏。

析构函数有问题吗?这是实现:

#include <iostream>
#include <vector>
using namespace std;
typedef unsigned int uint;

class BNode{
    private:
        uint *keys;
        int B;
        BNode **C;
        int n;
        bool leaf;

    public:
        BNode(int temp, bool bool_leaf);
        ~BNode();
        void insertNonFull(uint k);
        void splitChild(int i, BNode *y);
        void traverse();
        BNode *search(uint k);
        friend class BTree;
};

class BTree{
    private:
        BNode *root;
        int B;
    public:
        BTree(int temp);
        ~BTree();
        BNode *search(uint k);
        int search_bool(uint k);
        void insert(uint k);
};

BNode::BNode(int t1, bool leaf1) {
    B = t1;
    leaf = leaf1;
    keys = new uint[2*B - 1];
    C = new BNode *[2*B];
    n = 0;
}

BNode::~BNode(){
    delete[] keys;
    delete[] C;
}

BNode *BNode::search(uint k){
    int i = 0;
    while (i < n && k > keys[i]){
        i++;
    }
    if(keys[i] == k){
        return this;
    }
    if (leaf == true){
        return NULL;
    }
    return C[i]->search(k);
}

void BTree::insert(uint k){
    if (root == NULL) {
        root = new BNode(B, true);
        root->keys[0] = k;
        root->n = 1;
    } 
    else{
        if (root->n == 2*B - 1){
            BNode *s = new BNode(B, false);
            s->C[0] = root;
            s->splitChild(0, root);
            int i = 0;
            if (s->keys[0] < k){
                i++;
            }
            s->C[i]->insertNonFull(k);
            root = s;
        } 
        else{
            root->insertNonFull(k);
        }
    }
}

void BNode::insertNonFull(uint k) {
    int i = n - 1;
    if (leaf == true) {
        while (i>=0 && keys[i] > k) {
            keys[i+1] = keys[i];
            i--;
        }
        keys[i+1] = k;
        n = n + 1;
    } 
    else {
        while (i>=0 && keys[i] > k){
            i--;
        }
        if (C[i+1]->n == 2*B-1) {
            splitChild(i+1, C[i+1]);
            if (keys[i + 1] < k){
                i++;
            }
        }
        C[i+1]->insertNonFull(k);
    }
}

void BNode::splitChild(int i, BNode *y) {
    BNode *z = new BNode(y->B, y->leaf);
    z->n = B - 1;
    for (int j = 0; j < B - 1; j++){
        z->keys[j] = y->keys[j+B];
    }
    if (!y->leaf){
        for (int j = 0; j < B; j++)
        z->C[j] = y->C[j + B];
    }
    y->n = B - 1;
    for(int j=n; j >= i+1; j--){
        C[j+1] = C[j];
    }
    C[i + 1] = z;
    for (int j = n - 1; j >= i; j--){
        keys[j+1] = keys[j];
    }
    keys[i] = y->keys[B - 1];
    n = n + 1;
}

BTree::BTree(int temp){
    root = NULL;
    B = temp;
}

BTree::~BTree(){
    delete root;
}

BNode *BTree::search(uint k){
    return (root == NULL) ? NULL : root->search(k);
}

int BTree::search_bool(uint k){
    if(search(k) != NULL){
        return true;
    }
    else{
        return false;
    }
}

提前致谢。

所以,简单的诊断是

delete[] C;

只删除数组,不删除数组包含的节点。所以,(a) 确保它们是正确的零初始化的 (b) 也删除它们:

BNode** C    = new BNode* [2 * B] { 0 };

// in the destructor:
for (int i = 0; i < 2 * B; ++i)
    delete C[i];
delete[] C;

但是。

这不能很好地工作,因为节点可以拆分。将一些节点从一个节点的 C 移动到另一个节点的 C 后,您 运行 进入双重释放。因此,当您拆分节点时,您必须确保再次将移出的 C 元素设置为 NULL:

            z->C[j] = y->C[j + B];
            y->C[j + B] = nullptr;

现在,这个程序 运行 在 valgrind、ubsan 和 asan 下是干净的,没有泄漏:

Live On Coliru

#include <iostream>
#include <vector>
using uint = unsigned;

class BNode {
  private:
    int     B;
    bool    leaf;
    uint*   keys = new uint[2 * B - 1]{0};
    BNode** C    = new BNode* [2 * B] { 0 };
    int     n    = 0;

  public:
    BNode(int t1, bool leaf1) : B(t1), leaf(leaf1) {}

    ~BNode()
    {
        delete[] keys;
        for (int i = 0; i < 2 * B; ++i)
            delete C[i];
        delete[] C;
    }

    BNode const* search(uint k) const
    {
        int i = 0;
        while (i < n && k > keys[i]) {
            i++;
        }
        if (keys[i] == k) {
            return this;
        }
        return leaf ? nullptr : C[i]->search(k);
    }

    void insertNonFull(uint k)
    {
        int i = n - 1;
        if (leaf == true) {
            while (i >= 0 && keys[i] > k) {
                keys[i + 1] = keys[i];
                i--;
            }
            keys[i + 1] = k;
            n           = n + 1;
        } else {
            while (i >= 0 && keys[i] > k) {
                i--;
            }
            if (C[i + 1]->n == 2 * B - 1) {
                splitChild(i + 1, C[i + 1]);
                if (keys[i + 1] < k) {
                    i++;
                }
            }
            C[i + 1]->insertNonFull(k);
        }
    }

    void splitChild(int i, BNode* y)
    {
        BNode* z = new BNode(y->B, y->leaf);
        z->n     = B - 1;
        for (int j = 0; j < B - 1; j++) {
            z->keys[j] = y->keys[j + B];
        }
        if (!y->leaf) {
            for (int j = 0; j < B; j++) {
                z->C[j]     = y->C[j + B];
                y->C[j + B] = nullptr;
            }
        }
        y->n = B - 1;
        for (int j = n; j >= i + 1; j--) {
            C[j + 1] = C[j];
        }
        C[i + 1] = z;
        for (int j = n - 1; j >= i; j--) {
            keys[j + 1] = keys[j];
        }
        keys[i] = y->keys[B - 1];
        n       = n + 1;
    }

    friend class BTree;
};

class BTree {
  private:
    BNode* root = nullptr;
    int    B;

  public:
    BTree(int temp)
    {
        root = nullptr;
        B    = temp;
    }

    ~BTree() { delete root; }

    BNode const* search(uint k) const
    {
        return (root == nullptr) ? nullptr : root->search(k);
    }

    bool search_bool(uint k) const { return search(k) != nullptr; }

    void insert(uint k)
    {
        if (!root) {
            root          = new BNode(B, true);
            root->keys[0] = k;
            root->n       = 1;
        } else {
            if (root->n == 2 * B - 1) {
                BNode* s = new BNode(B, false);
                s->C[0]  = root;
                s->splitChild(0, root);
                int i = 0;
                if (s->keys[0] < k) {
                    i++;
                }
                s->C[i]->insertNonFull(k);
                root = s;
            } else {
                root->insertNonFull(k);
            }
        }
    }
};

int main()
{
    for (int b = 8; b < 17; ++b) {
        BTree tree(b);
        for (int i = 0; i < 100'000; ++i)
            tree.insert(rand() % 1000);
    }
}

收盘中

在这里获得算法的荣誉。这并不容易。

正如您在我的 cleanup/review 中看到的那样,我加强了很多东西,主要是围绕初始化。这是一个重要的习惯。我实际上不能排除不这样做会暴露其他沉睡的虫子。

还要注意增加的常量正确性。

另外,正如其他人所说,更喜欢智能指针和现代 C++ 技术。令人惊奇的是,仅使用 std::arrayunique_ptr 等方式,错误率就会大大降低。例如,splitChild 中移动节点的错误永远不会编译,因为你 必须 明确地移动分配 unique_ptr

奖金

更现代的 C++ 示例:

  • 没有任何new/delete
  • 没有任何原始指针
  • 不再需要手动析构函数(甚至不需要构造函数,真的)
  • 编译器检查了移动语义,不错!
  • 静态已知的 B 因子,因此一切的性能都提高了 10 倍,同时仍然可调
  • 通用键(元素)类型,因此您现在可以存储 std::stringdouble 等等
  • 通用比较器,因此您可以按其他顺序对键进行排序(例如 Btree<std::string, 16, std::greater<> > 以降序而不是升序存储键)。
  • 没有泄漏!

Live On Coliru

#include <array>
#include <algorithm>
#include <memory>
#include <iostream>

template <typename T, unsigned B = 16, typename Cmp = std::less<T>>
class BTree {
  private:
    static constexpr unsigned MaxKeys     = 2 * B - 1;
    static constexpr unsigned MaxChildren = 2 * B;

    struct BNode;
    using NodePtr = std::unique_ptr<BNode>;

    struct BNode {
        bool _leaf;
        int  _n = 0;

        std::array<T, MaxKeys>           _keys{};
        std::array<NodePtr, MaxChildren> _children{};

        BNode(bool leaf1) : _leaf(leaf1) {}

        BNode const* search(T k, Cmp cmp) const
        {
            int i = 0;
            while (i < _n && cmp(_keys[i], k)) {
                i++;
            }
            if (_keys[i] == k) {
                return this;
            }
            return _leaf ? nullptr : _children[i]->search(k, cmp);
        }

        void insertNonFull(T k, Cmp cmp)
        {
            int i = _n - 1;
            if (_leaf == true) {
                while (i >= 0 && cmp(k, _keys[i])) {
                    _keys[i + 1] = _keys[i];
                    i--;
                }
                _keys[i + 1] = k;
                _n           = _n + 1;
            } else {
                while (i >= 0 && cmp(k, _keys[i])) {
                    i--;
                }
                if (_children[i + 1]->_n == MaxKeys) {
                    splitChild(i + 1, *_children[i + 1]);
                    if (cmp(_keys[i + 1], k)) {
                        i++;
                    }
                }
                _children[i + 1]->insertNonFull(k, cmp);
            }
        }

        void splitChild(int i, BNode& y)
        {
            NodePtr z = std::make_unique<BNode>(y._leaf);
            z->_n     = B - 1;
            for (unsigned j = 0; j < B - 1; j++) {
                z->_keys[j] = y._keys[j + B];
            }
            if (!y._leaf) {
                for (unsigned j = 0; j < B; j++) {
                    z->_children[j] = std::move(y._children[j + B]);
                }
            }
            y._n = B - 1;
            for (int j = _n; j >= i + 1; j--) {
                _children[j + 1] = std::move(_children[j]);
            }
            _children[i + 1] = std::move(z);
            for (int j = _n - 1; j >= i; j--) {
                _keys[j + 1] = _keys[j];
            }
            _keys[i] = y._keys[B - 1];
            _n       = _n + 1;
        }
    };

    NodePtr root = nullptr;
    Cmp     _cmp{};

  public:
    BTree(Cmp cmp = {}) : _cmp(std::move(cmp)) {}

    BNode const* search(T k) const {
        return root ? root->search(k, _cmp) : nullptr;
    }

    bool search_bool(T k) const { return search(k) != nullptr; }

    void insert(T k)
    {
        if (!root) {
            root           = std::make_unique<BNode>(true);
            root->_keys[0] = k;
            root->_n       = 1;
        } else {
            if (root->_n == MaxKeys) {
                NodePtr s       = std::make_unique<BNode>(false);
                s->splitChild(0, *root);
                s->_children[0] = std::move(root);
                int i = 0;
                if (_cmp(s->_keys[0], k)) {
                    i++;
                }
                s->_children[i]->insertNonFull(k, _cmp);
                root = std::move(s);
            } else {
                root->insertNonFull(k, _cmp);
            }
        }
    }
};

int main()
{
    using Asc  = std::less<>;
    using Desc = std::greater<>;
    BTree<double,   8,  Asc>  b8;
    BTree<int,      9,  Desc> b9;
    BTree<unsigned, 10, Asc>  b10;
    BTree<size_t,   11, Desc> b11;
    BTree<double,   12, Asc>  b12;
    BTree<int,      13, Desc> b13;
    BTree<unsigned, 14, Asc>  b14;
    BTree<size_t,   15, Desc> b15;
    BTree<int,      16> b16; // default is ascending

    for (int i = 0; i < 100'000; ++i) {
        b8.insert(rand() % 10000);
        b9.insert(rand() % 10000);
        b10.insert(rand() % 10000);
        b11.insert(rand() % 10000);
        b12.insert(rand() % 10000);
        b13.insert(rand() % 10000);
        b14.insert(rand() % 10000);
        b15.insert(rand() % 10000);
        b16.insert(rand() % 10000);
    }

    {
        struct NC {
            int v;
            bool operator==(NC const& o) const { return v == o.v; }
        };
        struct CMP {
            bool operator()(NC const& a, NC const& b) const { return a.v < b.v; }
        };

        BTree<NC, 8, CMP> b8;
        b8.insert({42});
        bool ok = b8.search_bool({42});
    }
}