使用 Trie 自动完成
Autocomplete using Trie
我正在尝试用 C++ 实现某种自动完成功能。首先使用 Trie,一旦成功(最重要的是,我知道它是如何工作的),我将尝试使用三元树。但就目前而言,当我添加以与 Trie 中已有字符不同的字符开头的单词时,我会遇到分段错误。
例如。我们加上"abc"、"abcd"和"abcde"这没问题。稍后当我想添加时(虽然 "abc" 等仍在 Trie 中)"xfce"、"xfced" 发生分段错误。
我已经调试了一段时间了,但似乎找不到问题所在。
我认为问题出在 Trie.cpp 的某处,所以我将在此处提供该文件。然而它也可能在主要功能中,但我不想因为发布太多代码而被大吼大叫...
#include "Trie.h"
#include <iostream>
Trie::Trie()
{
this->root = new Node(false);
}
Trie::~Trie()
{
}
Trie::Node::Node(bool isLeaf)
{
this->isLeaf = isLeaf;
}
void Trie::insert(const std::string& word)
{
Node* crawler = this->root;
int index;
for(int i = 0; i < word.length(); ++i)
{
index = CHAR_TO_INDEX(word.at(i));
if(!crawler->children[index])
{
crawler->children[index] = new Node(false);
}
crawler = crawler->children[index];
}
crawler->isLeaf = true;
}
int Trie::contains(const std::string& word)
{
int index;
Node* crawler = this->root;
for(int i = 0; i < word.length(); ++i)
{
index = CHAR_TO_INDEX(word.at(i));
if(!crawler->children[index])
{
return -1;
}
crawler = crawler->children[index];
}
return (crawler != NULL && crawler->isLeaf);
}
std::vector<std::string> Trie::possibleSuffixes(std::string& prefix)
{
Node* crawler = this->root;
int index;
std::vector<std::string> result;
for(int i = 0; i < prefix.length(); ++i)
{
index = CHAR_TO_INDEX(prefix.at(i));
crawler = crawler->children[index];
}
traverse(prefix, crawler, result);
return result;
}
void Trie::traverse(std::string prefix, Node* node, std::vector<std::string>& v)
{
if(node->isLeaf)
{
v.push_back(prefix);
}
for(int i = 0; i < ALPHABET; ++i)
{
if(node->children[i])
{
traverse(prefix + (char)('a' + i), node->children[i], v);
}
}
}
整个 Trie class:
#ifndef TRIE_H
#define TRIE_H
#include <string>
#include <vector>
#define ARRAYSIZE(a) sizeof(a / sizeof(a[0]))
#define ALPHABET 26
#define CHAR_TO_INDEX(c) ((int)c - (int)'a')
class Trie
{
private:
struct Node
{
Node(bool isLeaf);
struct Node *children[ALPHABET];
bool isLeaf;
};
Node *root;
void traverse(std::string prefix, Node* node, std::vector<std::string>& v);
public:
Trie();
~Trie();
int contains(const std::string& word); //Checks the existance of a specific word in the trie
void insert(const std::string& word); //Inserts new word in the trie if not already there
std::vector<std::string> possibleSuffixes(std::string& prefix);
};
虽然你没有提到你的 Node
class,但我假设是这样的 -
class Node {
public:
bool isLeaf;
// must be >= 25 as you're inserting lowercase letters
// assuming your CHAR_TO_INDEX(ch) returns 0 based index
// e.g. 'a' => 0, 'b' => 1 ... 'z' => 25
Node* children[30];
// default constructor should be like this
Node(): isLeaf(false) {
for(int i = 0; i < 26; i++) {
children[i] = NULL;
}
}
~Node() {
for(int i = 0; i < 26; i++) {
if(children[i]) {
delete children[i];
children[i] = NULL;
}
}
delete this;
}
};
请比较一下你的Node
class/struct是不是这样的。
我正在尝试用 C++ 实现某种自动完成功能。首先使用 Trie,一旦成功(最重要的是,我知道它是如何工作的),我将尝试使用三元树。但就目前而言,当我添加以与 Trie 中已有字符不同的字符开头的单词时,我会遇到分段错误。
例如。我们加上"abc"、"abcd"和"abcde"这没问题。稍后当我想添加时(虽然 "abc" 等仍在 Trie 中)"xfce"、"xfced" 发生分段错误。
我已经调试了一段时间了,但似乎找不到问题所在。
我认为问题出在 Trie.cpp 的某处,所以我将在此处提供该文件。然而它也可能在主要功能中,但我不想因为发布太多代码而被大吼大叫...
#include "Trie.h"
#include <iostream>
Trie::Trie()
{
this->root = new Node(false);
}
Trie::~Trie()
{
}
Trie::Node::Node(bool isLeaf)
{
this->isLeaf = isLeaf;
}
void Trie::insert(const std::string& word)
{
Node* crawler = this->root;
int index;
for(int i = 0; i < word.length(); ++i)
{
index = CHAR_TO_INDEX(word.at(i));
if(!crawler->children[index])
{
crawler->children[index] = new Node(false);
}
crawler = crawler->children[index];
}
crawler->isLeaf = true;
}
int Trie::contains(const std::string& word)
{
int index;
Node* crawler = this->root;
for(int i = 0; i < word.length(); ++i)
{
index = CHAR_TO_INDEX(word.at(i));
if(!crawler->children[index])
{
return -1;
}
crawler = crawler->children[index];
}
return (crawler != NULL && crawler->isLeaf);
}
std::vector<std::string> Trie::possibleSuffixes(std::string& prefix)
{
Node* crawler = this->root;
int index;
std::vector<std::string> result;
for(int i = 0; i < prefix.length(); ++i)
{
index = CHAR_TO_INDEX(prefix.at(i));
crawler = crawler->children[index];
}
traverse(prefix, crawler, result);
return result;
}
void Trie::traverse(std::string prefix, Node* node, std::vector<std::string>& v)
{
if(node->isLeaf)
{
v.push_back(prefix);
}
for(int i = 0; i < ALPHABET; ++i)
{
if(node->children[i])
{
traverse(prefix + (char)('a' + i), node->children[i], v);
}
}
}
整个 Trie class:
#ifndef TRIE_H
#define TRIE_H
#include <string>
#include <vector>
#define ARRAYSIZE(a) sizeof(a / sizeof(a[0]))
#define ALPHABET 26
#define CHAR_TO_INDEX(c) ((int)c - (int)'a')
class Trie
{
private:
struct Node
{
Node(bool isLeaf);
struct Node *children[ALPHABET];
bool isLeaf;
};
Node *root;
void traverse(std::string prefix, Node* node, std::vector<std::string>& v);
public:
Trie();
~Trie();
int contains(const std::string& word); //Checks the existance of a specific word in the trie
void insert(const std::string& word); //Inserts new word in the trie if not already there
std::vector<std::string> possibleSuffixes(std::string& prefix);
};
虽然你没有提到你的 Node
class,但我假设是这样的 -
class Node {
public:
bool isLeaf;
// must be >= 25 as you're inserting lowercase letters
// assuming your CHAR_TO_INDEX(ch) returns 0 based index
// e.g. 'a' => 0, 'b' => 1 ... 'z' => 25
Node* children[30];
// default constructor should be like this
Node(): isLeaf(false) {
for(int i = 0; i < 26; i++) {
children[i] = NULL;
}
}
~Node() {
for(int i = 0; i < 26; i++) {
if(children[i]) {
delete children[i];
children[i] = NULL;
}
}
delete this;
}
};
请比较一下你的Node
class/struct是不是这样的。