Javascript:在前缀树中准确找到 10 个以给定前缀开头的单词
Javascript: Find exactly 10 words in a prefix tree that start with a given prefix
我有一个特里树(也叫前缀树)。给定一个前缀,我想获得以该前缀开头的 10 个单词的列表。
这个问题的独特之处在于我只需要 10 个以给定前缀开头的单词——而不是全部。鉴于此,可以进行一些优化。
我知道下面的代码工作正常。 trie 中的每个节点都有一个 children
属性 和一个 this_is_the_end_of_a_word
属性。例如,当您插入 "hi" 时,这就是特里树的样子:
.
问题:给定一个前缀,我想得到一个以前缀开头的 10 个单词的列表。
我解决这个问题的方法是:沿着前缀树向下,沿着 prefix
的字符,直到到达与 prefix
的最后一个字符对应的节点。现在您应该在此节点上执行 DFS,跟踪列表中具有 this_is_the_end_of_a_word === true
的节点。但是当列表的长度等于 10 并且 return 列表时,您应该停止搜索。
我认为我的方法是合理的,但我在实施它时遇到了问题——特别是因为我正在尝试使用递归 DFS,所以我不确定如何传递 "global"在递归调用之间列出。我知道我应该使用闭包,但我是 javascript 的新手,我不确定如何去做。下面是我已经尝试过的示例。
我的 Trie class(我知道这段代码有效,这只是为了让您了解我是如何组织我的数据结构的。)
var Trie = function() {
var that = Object.create(Trie.prototype);
that.children = {}; //mapping: next character -> child nodes
that.this_is_the_end_of_a_word = false;
that.insertWord = function(word) {
var current_node = that;
for (var i = 0; i < word.length; i++) {
var c = word[i]
//if character is not in the trie already, add it
if (!(c in current_node.children)) {
current_node.children[c] = Trie();
}
//update current_node
current_node = current_node.children[c];
};
//after adding all the chars of the word,
//you are at the end of a word
current_node.this_is_the_end_of_a_word = true;
}
that.insertWords = function(words) {
for (var i = 0; i < words.length; i++) {
that.insertWord(words[i]);
}
}
that.contains = function(word) {
//start at the root
var current_node = that;
for (var i = 0; i < word.length; i++) {
var c = word[i];
//if the word's character isn't a child of the current_node,
//the word isn't in the trie
if (!(c in current_node.children)) {
return false;
}
//move down the trie, update current_node
current_node = current_node.children[c];
};
return current_node.this_is_the_end_of_a_word;
}
Object.freeze(that);
return that;
}
我的第一种方法(有很多错误)
num_words_to_go = 10;
//this global is bad practice;
//I want to put this as the argument to a closure
//so it's passed between recursive calls
that.getWords = function(start_node, prefix) {
console.log(0);
var words = [];
//if start node is a word, add it
if (start_node.this_is_the_end_of_a_word) {
words.push(start_node);
num_words_to_go--;
}
if (num_words_to_go <= 0 || !start_node.children) {
return words;
}
return start_node.children.forEach(
currentValue.getWords(
currentValue, prefix + <character for this child>));
/*I can't think of a nice way to write this without going through all of the children.
I know I don't need to, because I only need to find 10 words and get out.
This is why I was leaning towards the recursive DFS.
*/
}
第二种方法:我还找到了一个我正在查看的 python 示例:
http://v1v3kn.tumblr.com/post/18238156967/roll-your-own-autocomplete-solution-using-tries
我尝试将他的示例翻译成 JavaScript,但 all_suffixes
.
仍然有问题
that.all_suffixes = function (prefix){
results = [];
if (that.this_is_the_end_of_a_word) results.push(prefix);
if (!(that.children)) return results;
if (results.length > 2) return results;
var callback = function(currentValue, i, array){
return currentValue.all_suffixes(prefix+array[i]);
}
arr = that.children.forEach(callback, that);
//[child.all_suffixes(prefix + char) for (char, child) in self.children.items()]
return concat(reduce(concat, arr), results);
}
that.autocomplete = function(prefix){
current_node = that;
for(var i = 0; i < prefix.length; i++){
var c = prefix[i];
//if there is nothing in the trie with this prefix
if (!(c in current_node.children)){
return [];
}
current_node = current_node.children[c];
}
return list(current_node.all_suffixes(prefix))
}
基本上我采用了您的模型并将新方法 getWords(word[, count])
应用于 Trie
class。我更改了方法 contains
,因为我也需要 getWords
中的功能。所以我创建了一个新方法 getNode
,其中 returns 找到单词或部分的节点。
方法getWords
首先查找词(部分),然后遍历数据结构。当找到一个词时,它会被推送到结果集中。如果结果集长度大于或等于所需长度,则迭代(因此 Array.prototype.some
)终止,fork
的递归调用停止。
that.getWords = function (word, count) {
function fork(n, w) {
function child(c) {
return fork(n.children[c], w + c);
}
n.isWord && words.push(w);
return words.length >= count || Object.keys(n.children).some(child);
}
var words = [],
current_node = that.getNode(word);
if (current_node) {
fork(current_node, word);
return words;
}
}
旁注:我将 this_is_the_end_of_a_word
更改为 isWord
。
输入
- 创建
Trie
的新实例。
- 插入一些测试用词。
输出
- 测试 trie 是否包含
'motor'
, returns false.
- 测试 trie 是否包含
'te'
, returns false.
- 测试 trie 是否包含
'ten'
, returns true.
- 获取所有个以
'ind'
开头的单词(可用8个,显示8个)。
- 获取以
'in'
开头的前 10 个单词(可用 16 个,显示 10 个)。
- 整个 trie。
var Trie = function () {
var that = Object.create(Trie.prototype);
that.children = {}; //mapping: next character -> child nodes
that.isWord = false;
that.insertWord = function (word) {
var current_node = that;
for (var i = 0; i < word.length; i++) {
var c = word[i]
//if character is not in the trie already, add it
if (!(c in current_node.children)) {
current_node.children[c] = Trie();
}
//update current_node
current_node = current_node.children[c];
};
//after adding all the chars of the word,
//you are at the end of a word
current_node.isWord = true;
}
that.insertWords = function (words) {
for (var i = 0; i < words.length; i++) {
that.insertWord(words[i]);
}
}
that.getNode = function (word) {
//start at the root
var current_node = that;
for (var i = 0; i < word.length; i++) {
var c = word[i];
//if the word's character isn't a child of the current_node,
//the word isn't in the trie
if (!(c in current_node.children)) {
return;
}
//move down the trie, update current_node
current_node = current_node.children[c];
};
return current_node;
}
that.contains = function (word) {
var current_node = that.getNode(word);
if (current_node) {
return current_node.isWord;
}
return false;
}
that.getWords = function (word, count) {
function fork(n, w) {
function child(c) {
return fork(n.children[c], w + c);
}
n.isWord && words.push(w);
return words.length >= count || Object.keys(n.children).some(child);
}
var words = [],
current_node = that.getNode(word);
if (current_node) {
fork(current_node, word);
return words;
}
}
// freeze does lock the isWord property, which is not required here
//Object.freeze(that);
return that;
}
var trie = new Trie();
trie.insertWords([
'car', 'cool', 'i', 'in', 'indeed', 'independence', 'india', 'indoor', 'induction',
'industrial', 'industry', 'indwell', 'inferior', 'informal', 'inhale', 'inn',
'inside', 'instance', 'intrepid', 'of', 'off', 'other', 'tea', 'ted', 'ten',
'to', 'zoo', 'zoom'
]);
document.write(trie.contains('motor') + '<br>'); // false
document.write(trie.contains('te') + '<br>'); // false
document.write(trie.contains('ten') + '<br>'); // true
document.write('<pre>' + JSON.stringify(trie.getWords('ind'), 0, 4) + '</pre>');
document.write('<pre>' + JSON.stringify(trie.getWords('in', 10), 0, 4) + '</pre>');
document.write('<pre>' + JSON.stringify(trie, 0, 4) + '</pre>');
我有一个特里树(也叫前缀树)。给定一个前缀,我想获得以该前缀开头的 10 个单词的列表。
这个问题的独特之处在于我只需要 10 个以给定前缀开头的单词——而不是全部。鉴于此,可以进行一些优化。
我知道下面的代码工作正常。 trie 中的每个节点都有一个 children
属性 和一个 this_is_the_end_of_a_word
属性。例如,当您插入 "hi" 时,这就是特里树的样子:
问题:给定一个前缀,我想得到一个以前缀开头的 10 个单词的列表。
我解决这个问题的方法是:沿着前缀树向下,沿着 prefix
的字符,直到到达与 prefix
的最后一个字符对应的节点。现在您应该在此节点上执行 DFS,跟踪列表中具有 this_is_the_end_of_a_word === true
的节点。但是当列表的长度等于 10 并且 return 列表时,您应该停止搜索。
我认为我的方法是合理的,但我在实施它时遇到了问题——特别是因为我正在尝试使用递归 DFS,所以我不确定如何传递 "global"在递归调用之间列出。我知道我应该使用闭包,但我是 javascript 的新手,我不确定如何去做。下面是我已经尝试过的示例。
我的 Trie class(我知道这段代码有效,这只是为了让您了解我是如何组织我的数据结构的。)
var Trie = function() {
var that = Object.create(Trie.prototype);
that.children = {}; //mapping: next character -> child nodes
that.this_is_the_end_of_a_word = false;
that.insertWord = function(word) {
var current_node = that;
for (var i = 0; i < word.length; i++) {
var c = word[i]
//if character is not in the trie already, add it
if (!(c in current_node.children)) {
current_node.children[c] = Trie();
}
//update current_node
current_node = current_node.children[c];
};
//after adding all the chars of the word,
//you are at the end of a word
current_node.this_is_the_end_of_a_word = true;
}
that.insertWords = function(words) {
for (var i = 0; i < words.length; i++) {
that.insertWord(words[i]);
}
}
that.contains = function(word) {
//start at the root
var current_node = that;
for (var i = 0; i < word.length; i++) {
var c = word[i];
//if the word's character isn't a child of the current_node,
//the word isn't in the trie
if (!(c in current_node.children)) {
return false;
}
//move down the trie, update current_node
current_node = current_node.children[c];
};
return current_node.this_is_the_end_of_a_word;
}
Object.freeze(that);
return that;
}
我的第一种方法(有很多错误)
num_words_to_go = 10;
//this global is bad practice;
//I want to put this as the argument to a closure
//so it's passed between recursive calls
that.getWords = function(start_node, prefix) {
console.log(0);
var words = [];
//if start node is a word, add it
if (start_node.this_is_the_end_of_a_word) {
words.push(start_node);
num_words_to_go--;
}
if (num_words_to_go <= 0 || !start_node.children) {
return words;
}
return start_node.children.forEach(
currentValue.getWords(
currentValue, prefix + <character for this child>));
/*I can't think of a nice way to write this without going through all of the children.
I know I don't need to, because I only need to find 10 words and get out.
This is why I was leaning towards the recursive DFS.
*/
}
第二种方法:我还找到了一个我正在查看的 python 示例:
http://v1v3kn.tumblr.com/post/18238156967/roll-your-own-autocomplete-solution-using-tries
我尝试将他的示例翻译成 JavaScript,但 all_suffixes
.
that.all_suffixes = function (prefix){
results = [];
if (that.this_is_the_end_of_a_word) results.push(prefix);
if (!(that.children)) return results;
if (results.length > 2) return results;
var callback = function(currentValue, i, array){
return currentValue.all_suffixes(prefix+array[i]);
}
arr = that.children.forEach(callback, that);
//[child.all_suffixes(prefix + char) for (char, child) in self.children.items()]
return concat(reduce(concat, arr), results);
}
that.autocomplete = function(prefix){
current_node = that;
for(var i = 0; i < prefix.length; i++){
var c = prefix[i];
//if there is nothing in the trie with this prefix
if (!(c in current_node.children)){
return [];
}
current_node = current_node.children[c];
}
return list(current_node.all_suffixes(prefix))
}
基本上我采用了您的模型并将新方法 getWords(word[, count])
应用于 Trie
class。我更改了方法 contains
,因为我也需要 getWords
中的功能。所以我创建了一个新方法 getNode
,其中 returns 找到单词或部分的节点。
方法getWords
首先查找词(部分),然后遍历数据结构。当找到一个词时,它会被推送到结果集中。如果结果集长度大于或等于所需长度,则迭代(因此 Array.prototype.some
)终止,fork
的递归调用停止。
that.getWords = function (word, count) {
function fork(n, w) {
function child(c) {
return fork(n.children[c], w + c);
}
n.isWord && words.push(w);
return words.length >= count || Object.keys(n.children).some(child);
}
var words = [],
current_node = that.getNode(word);
if (current_node) {
fork(current_node, word);
return words;
}
}
旁注:我将 this_is_the_end_of_a_word
更改为 isWord
。
输入
- 创建
Trie
的新实例。 - 插入一些测试用词。
输出
- 测试 trie 是否包含
'motor'
, returns false. - 测试 trie 是否包含
'te'
, returns false. - 测试 trie 是否包含
'ten'
, returns true. - 获取所有个以
'ind'
开头的单词(可用8个,显示8个)。 - 获取以
'in'
开头的前 10 个单词(可用 16 个,显示 10 个)。 - 整个 trie。
var Trie = function () {
var that = Object.create(Trie.prototype);
that.children = {}; //mapping: next character -> child nodes
that.isWord = false;
that.insertWord = function (word) {
var current_node = that;
for (var i = 0; i < word.length; i++) {
var c = word[i]
//if character is not in the trie already, add it
if (!(c in current_node.children)) {
current_node.children[c] = Trie();
}
//update current_node
current_node = current_node.children[c];
};
//after adding all the chars of the word,
//you are at the end of a word
current_node.isWord = true;
}
that.insertWords = function (words) {
for (var i = 0; i < words.length; i++) {
that.insertWord(words[i]);
}
}
that.getNode = function (word) {
//start at the root
var current_node = that;
for (var i = 0; i < word.length; i++) {
var c = word[i];
//if the word's character isn't a child of the current_node,
//the word isn't in the trie
if (!(c in current_node.children)) {
return;
}
//move down the trie, update current_node
current_node = current_node.children[c];
};
return current_node;
}
that.contains = function (word) {
var current_node = that.getNode(word);
if (current_node) {
return current_node.isWord;
}
return false;
}
that.getWords = function (word, count) {
function fork(n, w) {
function child(c) {
return fork(n.children[c], w + c);
}
n.isWord && words.push(w);
return words.length >= count || Object.keys(n.children).some(child);
}
var words = [],
current_node = that.getNode(word);
if (current_node) {
fork(current_node, word);
return words;
}
}
// freeze does lock the isWord property, which is not required here
//Object.freeze(that);
return that;
}
var trie = new Trie();
trie.insertWords([
'car', 'cool', 'i', 'in', 'indeed', 'independence', 'india', 'indoor', 'induction',
'industrial', 'industry', 'indwell', 'inferior', 'informal', 'inhale', 'inn',
'inside', 'instance', 'intrepid', 'of', 'off', 'other', 'tea', 'ted', 'ten',
'to', 'zoo', 'zoom'
]);
document.write(trie.contains('motor') + '<br>'); // false
document.write(trie.contains('te') + '<br>'); // false
document.write(trie.contains('ten') + '<br>'); // true
document.write('<pre>' + JSON.stringify(trie.getWords('ind'), 0, 4) + '</pre>');
document.write('<pre>' + JSON.stringify(trie.getWords('in', 10), 0, 4) + '</pre>');
document.write('<pre>' + JSON.stringify(trie, 0, 4) + '</pre>');