C++ 上的 B-Tree 实现有内存泄漏?
B-Tree implementation on C++ has a memory leak?
我有这个 B-Tree 实现,我正在使用 search() 和 insert() 对其进行测试。测试基本上是这样的:
void function(){
BTree b16(16);
// do lots of inserts and searchs on b16
}
for(many_times){
function();
}
如果我没理解错的话,在function()的每次迭代之后,b16应该被销毁。然而,在 ~250 次迭代后,我得到一个 bad_alloc 错误,这意味着我有内存泄漏。
析构函数有问题吗?这是实现:
#include <iostream>
#include <vector>
using namespace std;
typedef unsigned int uint;
class BNode{
private:
uint *keys;
int B;
BNode **C;
int n;
bool leaf;
public:
BNode(int temp, bool bool_leaf);
~BNode();
void insertNonFull(uint k);
void splitChild(int i, BNode *y);
void traverse();
BNode *search(uint k);
friend class BTree;
};
class BTree{
private:
BNode *root;
int B;
public:
BTree(int temp);
~BTree();
BNode *search(uint k);
int search_bool(uint k);
void insert(uint k);
};
BNode::BNode(int t1, bool leaf1) {
B = t1;
leaf = leaf1;
keys = new uint[2*B - 1];
C = new BNode *[2*B];
n = 0;
}
BNode::~BNode(){
delete[] keys;
delete[] C;
}
BNode *BNode::search(uint k){
int i = 0;
while (i < n && k > keys[i]){
i++;
}
if(keys[i] == k){
return this;
}
if (leaf == true){
return NULL;
}
return C[i]->search(k);
}
void BTree::insert(uint k){
if (root == NULL) {
root = new BNode(B, true);
root->keys[0] = k;
root->n = 1;
}
else{
if (root->n == 2*B - 1){
BNode *s = new BNode(B, false);
s->C[0] = root;
s->splitChild(0, root);
int i = 0;
if (s->keys[0] < k){
i++;
}
s->C[i]->insertNonFull(k);
root = s;
}
else{
root->insertNonFull(k);
}
}
}
void BNode::insertNonFull(uint k) {
int i = n - 1;
if (leaf == true) {
while (i>=0 && keys[i] > k) {
keys[i+1] = keys[i];
i--;
}
keys[i+1] = k;
n = n + 1;
}
else {
while (i>=0 && keys[i] > k){
i--;
}
if (C[i+1]->n == 2*B-1) {
splitChild(i+1, C[i+1]);
if (keys[i + 1] < k){
i++;
}
}
C[i+1]->insertNonFull(k);
}
}
void BNode::splitChild(int i, BNode *y) {
BNode *z = new BNode(y->B, y->leaf);
z->n = B - 1;
for (int j = 0; j < B - 1; j++){
z->keys[j] = y->keys[j+B];
}
if (!y->leaf){
for (int j = 0; j < B; j++)
z->C[j] = y->C[j + B];
}
y->n = B - 1;
for(int j=n; j >= i+1; j--){
C[j+1] = C[j];
}
C[i + 1] = z;
for (int j = n - 1; j >= i; j--){
keys[j+1] = keys[j];
}
keys[i] = y->keys[B - 1];
n = n + 1;
}
BTree::BTree(int temp){
root = NULL;
B = temp;
}
BTree::~BTree(){
delete root;
}
BNode *BTree::search(uint k){
return (root == NULL) ? NULL : root->search(k);
}
int BTree::search_bool(uint k){
if(search(k) != NULL){
return true;
}
else{
return false;
}
}
提前致谢。
所以,简单的诊断是
delete[] C;
只删除数组,不删除数组包含的节点。所以,(a) 确保它们是正确的零初始化的 (b) 也删除它们:
BNode** C = new BNode* [2 * B] { 0 };
// in the destructor:
for (int i = 0; i < 2 * B; ++i)
delete C[i];
delete[] C;
但是。
这不能很好地工作,因为节点可以拆分。将一些节点从一个节点的 C 移动到另一个节点的 C 后,您 运行 进入双重释放。因此,当您拆分节点时,您必须确保再次将移出的 C 元素设置为 NULL:
z->C[j] = y->C[j + B];
y->C[j + B] = nullptr;
现在,这个程序 运行 在 valgrind、ubsan 和 asan 下是干净的,没有泄漏:
#include <iostream>
#include <vector>
using uint = unsigned;
class BNode {
private:
int B;
bool leaf;
uint* keys = new uint[2 * B - 1]{0};
BNode** C = new BNode* [2 * B] { 0 };
int n = 0;
public:
BNode(int t1, bool leaf1) : B(t1), leaf(leaf1) {}
~BNode()
{
delete[] keys;
for (int i = 0; i < 2 * B; ++i)
delete C[i];
delete[] C;
}
BNode const* search(uint k) const
{
int i = 0;
while (i < n && k > keys[i]) {
i++;
}
if (keys[i] == k) {
return this;
}
return leaf ? nullptr : C[i]->search(k);
}
void insertNonFull(uint k)
{
int i = n - 1;
if (leaf == true) {
while (i >= 0 && keys[i] > k) {
keys[i + 1] = keys[i];
i--;
}
keys[i + 1] = k;
n = n + 1;
} else {
while (i >= 0 && keys[i] > k) {
i--;
}
if (C[i + 1]->n == 2 * B - 1) {
splitChild(i + 1, C[i + 1]);
if (keys[i + 1] < k) {
i++;
}
}
C[i + 1]->insertNonFull(k);
}
}
void splitChild(int i, BNode* y)
{
BNode* z = new BNode(y->B, y->leaf);
z->n = B - 1;
for (int j = 0; j < B - 1; j++) {
z->keys[j] = y->keys[j + B];
}
if (!y->leaf) {
for (int j = 0; j < B; j++) {
z->C[j] = y->C[j + B];
y->C[j + B] = nullptr;
}
}
y->n = B - 1;
for (int j = n; j >= i + 1; j--) {
C[j + 1] = C[j];
}
C[i + 1] = z;
for (int j = n - 1; j >= i; j--) {
keys[j + 1] = keys[j];
}
keys[i] = y->keys[B - 1];
n = n + 1;
}
friend class BTree;
};
class BTree {
private:
BNode* root = nullptr;
int B;
public:
BTree(int temp)
{
root = nullptr;
B = temp;
}
~BTree() { delete root; }
BNode const* search(uint k) const
{
return (root == nullptr) ? nullptr : root->search(k);
}
bool search_bool(uint k) const { return search(k) != nullptr; }
void insert(uint k)
{
if (!root) {
root = new BNode(B, true);
root->keys[0] = k;
root->n = 1;
} else {
if (root->n == 2 * B - 1) {
BNode* s = new BNode(B, false);
s->C[0] = root;
s->splitChild(0, root);
int i = 0;
if (s->keys[0] < k) {
i++;
}
s->C[i]->insertNonFull(k);
root = s;
} else {
root->insertNonFull(k);
}
}
}
};
int main()
{
for (int b = 8; b < 17; ++b) {
BTree tree(b);
for (int i = 0; i < 100'000; ++i)
tree.insert(rand() % 1000);
}
}
收盘中
在这里获得算法的荣誉。这并不容易。
正如您在我的 cleanup/review 中看到的那样,我加强了很多东西,主要是围绕初始化。这是一个重要的习惯。我实际上不能排除不这样做会暴露其他沉睡的虫子。
还要注意增加的常量正确性。
另外,正如其他人所说,更喜欢智能指针和现代 C++ 技术。令人惊奇的是,仅使用 std::array
、unique_ptr
等方式,错误率就会大大降低。例如,splitChild
中移动节点的错误永远不会编译,因为你 必须 明确地移动分配 unique_ptr
奖金
更现代的 C++ 示例:
- 没有任何new/delete
- 没有任何原始指针
- 不再需要手动析构函数(甚至不需要构造函数,真的)
- 编译器检查了移动语义,不错!
- 静态已知的 B 因子,因此一切的性能都提高了 10 倍,同时仍然可调
- 通用键(元素)类型,因此您现在可以存储
std::string
、double
等等
- 通用比较器,因此您可以按其他顺序对键进行排序(例如
Btree<std::string, 16, std::greater<> >
以降序而不是升序存储键)。
- 没有泄漏!
#include <array>
#include <algorithm>
#include <memory>
#include <iostream>
template <typename T, unsigned B = 16, typename Cmp = std::less<T>>
class BTree {
private:
static constexpr unsigned MaxKeys = 2 * B - 1;
static constexpr unsigned MaxChildren = 2 * B;
struct BNode;
using NodePtr = std::unique_ptr<BNode>;
struct BNode {
bool _leaf;
int _n = 0;
std::array<T, MaxKeys> _keys{};
std::array<NodePtr, MaxChildren> _children{};
BNode(bool leaf1) : _leaf(leaf1) {}
BNode const* search(T k, Cmp cmp) const
{
int i = 0;
while (i < _n && cmp(_keys[i], k)) {
i++;
}
if (_keys[i] == k) {
return this;
}
return _leaf ? nullptr : _children[i]->search(k, cmp);
}
void insertNonFull(T k, Cmp cmp)
{
int i = _n - 1;
if (_leaf == true) {
while (i >= 0 && cmp(k, _keys[i])) {
_keys[i + 1] = _keys[i];
i--;
}
_keys[i + 1] = k;
_n = _n + 1;
} else {
while (i >= 0 && cmp(k, _keys[i])) {
i--;
}
if (_children[i + 1]->_n == MaxKeys) {
splitChild(i + 1, *_children[i + 1]);
if (cmp(_keys[i + 1], k)) {
i++;
}
}
_children[i + 1]->insertNonFull(k, cmp);
}
}
void splitChild(int i, BNode& y)
{
NodePtr z = std::make_unique<BNode>(y._leaf);
z->_n = B - 1;
for (unsigned j = 0; j < B - 1; j++) {
z->_keys[j] = y._keys[j + B];
}
if (!y._leaf) {
for (unsigned j = 0; j < B; j++) {
z->_children[j] = std::move(y._children[j + B]);
}
}
y._n = B - 1;
for (int j = _n; j >= i + 1; j--) {
_children[j + 1] = std::move(_children[j]);
}
_children[i + 1] = std::move(z);
for (int j = _n - 1; j >= i; j--) {
_keys[j + 1] = _keys[j];
}
_keys[i] = y._keys[B - 1];
_n = _n + 1;
}
};
NodePtr root = nullptr;
Cmp _cmp{};
public:
BTree(Cmp cmp = {}) : _cmp(std::move(cmp)) {}
BNode const* search(T k) const {
return root ? root->search(k, _cmp) : nullptr;
}
bool search_bool(T k) const { return search(k) != nullptr; }
void insert(T k)
{
if (!root) {
root = std::make_unique<BNode>(true);
root->_keys[0] = k;
root->_n = 1;
} else {
if (root->_n == MaxKeys) {
NodePtr s = std::make_unique<BNode>(false);
s->splitChild(0, *root);
s->_children[0] = std::move(root);
int i = 0;
if (_cmp(s->_keys[0], k)) {
i++;
}
s->_children[i]->insertNonFull(k, _cmp);
root = std::move(s);
} else {
root->insertNonFull(k, _cmp);
}
}
}
};
int main()
{
using Asc = std::less<>;
using Desc = std::greater<>;
BTree<double, 8, Asc> b8;
BTree<int, 9, Desc> b9;
BTree<unsigned, 10, Asc> b10;
BTree<size_t, 11, Desc> b11;
BTree<double, 12, Asc> b12;
BTree<int, 13, Desc> b13;
BTree<unsigned, 14, Asc> b14;
BTree<size_t, 15, Desc> b15;
BTree<int, 16> b16; // default is ascending
for (int i = 0; i < 100'000; ++i) {
b8.insert(rand() % 10000);
b9.insert(rand() % 10000);
b10.insert(rand() % 10000);
b11.insert(rand() % 10000);
b12.insert(rand() % 10000);
b13.insert(rand() % 10000);
b14.insert(rand() % 10000);
b15.insert(rand() % 10000);
b16.insert(rand() % 10000);
}
{
struct NC {
int v;
bool operator==(NC const& o) const { return v == o.v; }
};
struct CMP {
bool operator()(NC const& a, NC const& b) const { return a.v < b.v; }
};
BTree<NC, 8, CMP> b8;
b8.insert({42});
bool ok = b8.search_bool({42});
}
}
我有这个 B-Tree 实现,我正在使用 search() 和 insert() 对其进行测试。测试基本上是这样的:
void function(){
BTree b16(16);
// do lots of inserts and searchs on b16
}
for(many_times){
function();
}
如果我没理解错的话,在function()的每次迭代之后,b16应该被销毁。然而,在 ~250 次迭代后,我得到一个 bad_alloc 错误,这意味着我有内存泄漏。
析构函数有问题吗?这是实现:
#include <iostream>
#include <vector>
using namespace std;
typedef unsigned int uint;
class BNode{
private:
uint *keys;
int B;
BNode **C;
int n;
bool leaf;
public:
BNode(int temp, bool bool_leaf);
~BNode();
void insertNonFull(uint k);
void splitChild(int i, BNode *y);
void traverse();
BNode *search(uint k);
friend class BTree;
};
class BTree{
private:
BNode *root;
int B;
public:
BTree(int temp);
~BTree();
BNode *search(uint k);
int search_bool(uint k);
void insert(uint k);
};
BNode::BNode(int t1, bool leaf1) {
B = t1;
leaf = leaf1;
keys = new uint[2*B - 1];
C = new BNode *[2*B];
n = 0;
}
BNode::~BNode(){
delete[] keys;
delete[] C;
}
BNode *BNode::search(uint k){
int i = 0;
while (i < n && k > keys[i]){
i++;
}
if(keys[i] == k){
return this;
}
if (leaf == true){
return NULL;
}
return C[i]->search(k);
}
void BTree::insert(uint k){
if (root == NULL) {
root = new BNode(B, true);
root->keys[0] = k;
root->n = 1;
}
else{
if (root->n == 2*B - 1){
BNode *s = new BNode(B, false);
s->C[0] = root;
s->splitChild(0, root);
int i = 0;
if (s->keys[0] < k){
i++;
}
s->C[i]->insertNonFull(k);
root = s;
}
else{
root->insertNonFull(k);
}
}
}
void BNode::insertNonFull(uint k) {
int i = n - 1;
if (leaf == true) {
while (i>=0 && keys[i] > k) {
keys[i+1] = keys[i];
i--;
}
keys[i+1] = k;
n = n + 1;
}
else {
while (i>=0 && keys[i] > k){
i--;
}
if (C[i+1]->n == 2*B-1) {
splitChild(i+1, C[i+1]);
if (keys[i + 1] < k){
i++;
}
}
C[i+1]->insertNonFull(k);
}
}
void BNode::splitChild(int i, BNode *y) {
BNode *z = new BNode(y->B, y->leaf);
z->n = B - 1;
for (int j = 0; j < B - 1; j++){
z->keys[j] = y->keys[j+B];
}
if (!y->leaf){
for (int j = 0; j < B; j++)
z->C[j] = y->C[j + B];
}
y->n = B - 1;
for(int j=n; j >= i+1; j--){
C[j+1] = C[j];
}
C[i + 1] = z;
for (int j = n - 1; j >= i; j--){
keys[j+1] = keys[j];
}
keys[i] = y->keys[B - 1];
n = n + 1;
}
BTree::BTree(int temp){
root = NULL;
B = temp;
}
BTree::~BTree(){
delete root;
}
BNode *BTree::search(uint k){
return (root == NULL) ? NULL : root->search(k);
}
int BTree::search_bool(uint k){
if(search(k) != NULL){
return true;
}
else{
return false;
}
}
提前致谢。
所以,简单的诊断是
delete[] C;
只删除数组,不删除数组包含的节点。所以,(a) 确保它们是正确的零初始化的 (b) 也删除它们:
BNode** C = new BNode* [2 * B] { 0 };
// in the destructor:
for (int i = 0; i < 2 * B; ++i)
delete C[i];
delete[] C;
但是。
这不能很好地工作,因为节点可以拆分。将一些节点从一个节点的 C 移动到另一个节点的 C 后,您 运行 进入双重释放。因此,当您拆分节点时,您必须确保再次将移出的 C 元素设置为 NULL:
z->C[j] = y->C[j + B];
y->C[j + B] = nullptr;
现在,这个程序 运行 在 valgrind、ubsan 和 asan 下是干净的,没有泄漏:
#include <iostream>
#include <vector>
using uint = unsigned;
class BNode {
private:
int B;
bool leaf;
uint* keys = new uint[2 * B - 1]{0};
BNode** C = new BNode* [2 * B] { 0 };
int n = 0;
public:
BNode(int t1, bool leaf1) : B(t1), leaf(leaf1) {}
~BNode()
{
delete[] keys;
for (int i = 0; i < 2 * B; ++i)
delete C[i];
delete[] C;
}
BNode const* search(uint k) const
{
int i = 0;
while (i < n && k > keys[i]) {
i++;
}
if (keys[i] == k) {
return this;
}
return leaf ? nullptr : C[i]->search(k);
}
void insertNonFull(uint k)
{
int i = n - 1;
if (leaf == true) {
while (i >= 0 && keys[i] > k) {
keys[i + 1] = keys[i];
i--;
}
keys[i + 1] = k;
n = n + 1;
} else {
while (i >= 0 && keys[i] > k) {
i--;
}
if (C[i + 1]->n == 2 * B - 1) {
splitChild(i + 1, C[i + 1]);
if (keys[i + 1] < k) {
i++;
}
}
C[i + 1]->insertNonFull(k);
}
}
void splitChild(int i, BNode* y)
{
BNode* z = new BNode(y->B, y->leaf);
z->n = B - 1;
for (int j = 0; j < B - 1; j++) {
z->keys[j] = y->keys[j + B];
}
if (!y->leaf) {
for (int j = 0; j < B; j++) {
z->C[j] = y->C[j + B];
y->C[j + B] = nullptr;
}
}
y->n = B - 1;
for (int j = n; j >= i + 1; j--) {
C[j + 1] = C[j];
}
C[i + 1] = z;
for (int j = n - 1; j >= i; j--) {
keys[j + 1] = keys[j];
}
keys[i] = y->keys[B - 1];
n = n + 1;
}
friend class BTree;
};
class BTree {
private:
BNode* root = nullptr;
int B;
public:
BTree(int temp)
{
root = nullptr;
B = temp;
}
~BTree() { delete root; }
BNode const* search(uint k) const
{
return (root == nullptr) ? nullptr : root->search(k);
}
bool search_bool(uint k) const { return search(k) != nullptr; }
void insert(uint k)
{
if (!root) {
root = new BNode(B, true);
root->keys[0] = k;
root->n = 1;
} else {
if (root->n == 2 * B - 1) {
BNode* s = new BNode(B, false);
s->C[0] = root;
s->splitChild(0, root);
int i = 0;
if (s->keys[0] < k) {
i++;
}
s->C[i]->insertNonFull(k);
root = s;
} else {
root->insertNonFull(k);
}
}
}
};
int main()
{
for (int b = 8; b < 17; ++b) {
BTree tree(b);
for (int i = 0; i < 100'000; ++i)
tree.insert(rand() % 1000);
}
}
收盘中
在这里获得算法的荣誉。这并不容易。
正如您在我的 cleanup/review 中看到的那样,我加强了很多东西,主要是围绕初始化。这是一个重要的习惯。我实际上不能排除不这样做会暴露其他沉睡的虫子。
还要注意增加的常量正确性。
另外,正如其他人所说,更喜欢智能指针和现代 C++ 技术。令人惊奇的是,仅使用 std::array
、unique_ptr
等方式,错误率就会大大降低。例如,splitChild
中移动节点的错误永远不会编译,因为你 必须 明确地移动分配 unique_ptr
奖金
更现代的 C++ 示例:
- 没有任何new/delete
- 没有任何原始指针
- 不再需要手动析构函数(甚至不需要构造函数,真的)
- 编译器检查了移动语义,不错!
- 静态已知的 B 因子,因此一切的性能都提高了 10 倍,同时仍然可调
- 通用键(元素)类型,因此您现在可以存储
std::string
、double
等等 - 通用比较器,因此您可以按其他顺序对键进行排序(例如
Btree<std::string, 16, std::greater<> >
以降序而不是升序存储键)。 - 没有泄漏!
#include <array>
#include <algorithm>
#include <memory>
#include <iostream>
template <typename T, unsigned B = 16, typename Cmp = std::less<T>>
class BTree {
private:
static constexpr unsigned MaxKeys = 2 * B - 1;
static constexpr unsigned MaxChildren = 2 * B;
struct BNode;
using NodePtr = std::unique_ptr<BNode>;
struct BNode {
bool _leaf;
int _n = 0;
std::array<T, MaxKeys> _keys{};
std::array<NodePtr, MaxChildren> _children{};
BNode(bool leaf1) : _leaf(leaf1) {}
BNode const* search(T k, Cmp cmp) const
{
int i = 0;
while (i < _n && cmp(_keys[i], k)) {
i++;
}
if (_keys[i] == k) {
return this;
}
return _leaf ? nullptr : _children[i]->search(k, cmp);
}
void insertNonFull(T k, Cmp cmp)
{
int i = _n - 1;
if (_leaf == true) {
while (i >= 0 && cmp(k, _keys[i])) {
_keys[i + 1] = _keys[i];
i--;
}
_keys[i + 1] = k;
_n = _n + 1;
} else {
while (i >= 0 && cmp(k, _keys[i])) {
i--;
}
if (_children[i + 1]->_n == MaxKeys) {
splitChild(i + 1, *_children[i + 1]);
if (cmp(_keys[i + 1], k)) {
i++;
}
}
_children[i + 1]->insertNonFull(k, cmp);
}
}
void splitChild(int i, BNode& y)
{
NodePtr z = std::make_unique<BNode>(y._leaf);
z->_n = B - 1;
for (unsigned j = 0; j < B - 1; j++) {
z->_keys[j] = y._keys[j + B];
}
if (!y._leaf) {
for (unsigned j = 0; j < B; j++) {
z->_children[j] = std::move(y._children[j + B]);
}
}
y._n = B - 1;
for (int j = _n; j >= i + 1; j--) {
_children[j + 1] = std::move(_children[j]);
}
_children[i + 1] = std::move(z);
for (int j = _n - 1; j >= i; j--) {
_keys[j + 1] = _keys[j];
}
_keys[i] = y._keys[B - 1];
_n = _n + 1;
}
};
NodePtr root = nullptr;
Cmp _cmp{};
public:
BTree(Cmp cmp = {}) : _cmp(std::move(cmp)) {}
BNode const* search(T k) const {
return root ? root->search(k, _cmp) : nullptr;
}
bool search_bool(T k) const { return search(k) != nullptr; }
void insert(T k)
{
if (!root) {
root = std::make_unique<BNode>(true);
root->_keys[0] = k;
root->_n = 1;
} else {
if (root->_n == MaxKeys) {
NodePtr s = std::make_unique<BNode>(false);
s->splitChild(0, *root);
s->_children[0] = std::move(root);
int i = 0;
if (_cmp(s->_keys[0], k)) {
i++;
}
s->_children[i]->insertNonFull(k, _cmp);
root = std::move(s);
} else {
root->insertNonFull(k, _cmp);
}
}
}
};
int main()
{
using Asc = std::less<>;
using Desc = std::greater<>;
BTree<double, 8, Asc> b8;
BTree<int, 9, Desc> b9;
BTree<unsigned, 10, Asc> b10;
BTree<size_t, 11, Desc> b11;
BTree<double, 12, Asc> b12;
BTree<int, 13, Desc> b13;
BTree<unsigned, 14, Asc> b14;
BTree<size_t, 15, Desc> b15;
BTree<int, 16> b16; // default is ascending
for (int i = 0; i < 100'000; ++i) {
b8.insert(rand() % 10000);
b9.insert(rand() % 10000);
b10.insert(rand() % 10000);
b11.insert(rand() % 10000);
b12.insert(rand() % 10000);
b13.insert(rand() % 10000);
b14.insert(rand() % 10000);
b15.insert(rand() % 10000);
b16.insert(rand() % 10000);
}
{
struct NC {
int v;
bool operator==(NC const& o) const { return v == o.v; }
};
struct CMP {
bool operator()(NC const& a, NC const& b) const { return a.v < b.v; }
};
BTree<NC, 8, CMP> b8;
b8.insert({42});
bool ok = b8.search_bool({42});
}
}