如何读取此 Radix Tree 结构以确定下一个字符串的概率?
How can I read this Radix Tree structure to determine next-string probability?
在 JavaScript 中,我尝试获取给定的用户输入并猜测 3 个最有可能完成用户当前(未完成)键入的单词的单词。猜测是基于用户过去的输入。我正在处理这个 here, in this JSFiddle.
我构建的用于记录用户过去输入的结构是经过修改的 Radix Tree (AKA Patricia Trie):
输入:“hey
”
{
"h": {
"value": "h",
"count": 1,
"followables": {
"e": {
"value": "e",
"count": 1,
"followables": {
"y": {
"value": "y",
"count": 1,
"followables": {}
}
}
}
}
}
}
这个数据结构构建得非常完美,我认为这是实现所述目标的最佳结构。我的问题是读取 Radix Tree 数据以定义给定输入的 3 个最可能单词的函数。例如在上面的数据中,如果用户输入“h
”,猜测函数应该return一个这样的对象:
guess : {
1 : "hey",
2 : "",
3 : ""
}
这是我的 code/progress:
学习 - 获取完整的输入字符串并将组合组织到基数树中 (brain
):
function learn(message, brain) {
if (message.length == 0) return {}; // or do something else
var ch = message[0]; // get the first character
if (!brain[ch]) { // create new node when not exists
brain[ch] = {
value: ch,
count: 1,
followables: {}
};
} else { // increment count when exist
brain[ch].count += 1;
}
var substr = message.substring(1); // remove first character
if (substr) { // do it for the remaining substring
brain[ch].followables = learn(substr, brain[ch].followables);
} else {
renderData();
}
return brain;
}
一切就绪。不幸的是,下一个代码,旨在读取数据并猜测用户正在输入的单词,并不好。我在处理对我来说非常复杂的功能时遇到了麻烦。我把它分成小功能,据我所知这是最好的做法,但恐怕我把它弄得一团糟,可能会简单得多:
猜测 - 获取 "learned" 字符串数据并猜测用户可能正在输入的单词:
function guess(progress, brain) {
console.log("Guessing based on: " + progress);
var guesses = {
0: "",
1: "",
2: ""
}
var firstChar = progress[0];
if (brain[firstChar]) {
var step = brain[firstChar];
for (var i = 0; i < progress.length; i++) {
var char = progress[i];
if (step.followables[char]) {
step = step.followables[char];
if (i == progress.length) {
var guesses = nextStrings(step.followables);
renderGuesses(guesses);
}
} else {
renderGuesses(guesses);
}
}
} else {
renderGuesses(guesses);
}
}
function renderGuesses(guesses) {
console.log(guesses);
$('#guess-1').text(guesses[0]);
$('#guess-2').text(guesses[1]);
$('#guess-3').text(guesses[2]);
}
function nextStrings(followables) {
console.log('Searching for next string...');
var results;
if (followables.length > 0) {
results = chooseRoutes(followables);
} else {
results = {
0: "",
1: "",
2: ""
}
}
console.log(result);
return result;
}
function chooseRoutes(followables) {
var results = {
0: {
value: "",
count: 0
},
1: {
value: "",
count: 0
},
2: {
value: "",
count: 0
}
};
for (var i = 0; i < followables.length; i++) {
var count = followables[i].count;
if (count > results[0].count) {
results[0].value = followStr(followables[i], "");
} else if (count > results[1].count) {
results[1].value = followStr(followables[i], "");
} else if (count > results[2].count) {
results[2].value = followStr(followables[i], "");
}
}
console.log(results);
return results;
}
function followStr(followables, str) {
var guess = {
value: "",
count: 0
};
for (var i = 0; i < followables.length; i++) {
if (followables[i].count > guess.count) {
guess = followables[i];
}
}
followables = guess.followables;
if (guess.value != " ") {
str += guess;
followStr(followables, str);
} else {
console.log(str);
return str;
}
}
旁注 - 虽然在字典上进行模糊字符串搜索是一种更常见的方法,但学习方法是根据用户 writing/messaging 风格和支持用户的非标准词汇 ("heyy
", "sup
", ":P
", "lol
") - 这些猜测的结果可以与标准词典结果结合(并优先于)。
您用于字典的结构不正确,它应该包含对象数组。例如,在您输入这些词后:
hi
hi
hi
hi
hi
hey
hello
hella
结构应该是:
history: [{
letter: "h",
count: 8,
followables: [{
letter: "e",
count: 3,
followables: [{
letter: "y",
count: 1,
followables: []
}, {
letter: "l",
count: 2,
followables: [{
letter: "l",
count: 2,
followables: [{
letter: "o",
count: 1,
followables: []
}, {
letter: "a",
count: 1,
followables: []
}]
}]
}]
}, {
letter: "i",
count: 5,
followables: []
}]
}]
我对您创建和存储历史记录的方式(我会使用 localStorage)不感兴趣。重点是深入挖掘树内部以获得建议的递归函数。对于给定的单词,这一个获得最终 followables
:
findChildren: function (node, depth) {
/* Traverse history for current word, there is only one path */
for (k in node) {
if (node[k].letter == app.progress[depth]) {
if (depth + 1 == app.progress.length) {
/* Found it, hope it has followables */
return node[k].followables;
} else {
/* Must go deeper... */
return app.findChildren(node[k].followables, depth + 1);
};
};
};
/* No results */
return false;
}
第二个 (getWord) 创建建议:
countWordsFromNode: function (node) {
for (i in node) {
if (node[i].followables.length) {
app.countWordsFromNode(node[i].followables);
} else {
app.totalInNode++;
};
};
},
getWord: function (node, limit) {
/* First sort by count */
var sorted = node.sort(function (n1, n2) {
return n2.count - n1.count;
});
for (k in sorted) {
app.guesses[app.totalFound].word += sorted[k].letter;
if (sorted[k].followables.length) {
app.totalInNode = 0;
app.countWordsFromNode(sorted[k].followables);
for (m = 1; m < app.totalInNode; m++) {
if ((app.totalFound + m) < limit) {
app.guesses[app.totalFound + m].word += sorted[k].letter;
};
};
app.getWord(sorted[k].followables, limit);
} else {
/* End of word */
app.totalFound++;
};
if (app.totalFound >= limit) {
/* Found all suggestions */
break;
};
};
}
您可以在this Fiddle中查看详细信息,我不得不删除您的一些代码。很容易集成到任何地方,比如你可以设置其他的建议字段,目前有:
guesses: [
{
element: $('#guess-1'),
word: ''
},
{
element: $('#guess-2'),
word: ''
},
{
element: $('#guess-3'),
word: ''
}
]
编辑:
修复了向右添加字母的错误。
在 JavaScript 中,我尝试获取给定的用户输入并猜测 3 个最有可能完成用户当前(未完成)键入的单词的单词。猜测是基于用户过去的输入。我正在处理这个 here, in this JSFiddle.
我构建的用于记录用户过去输入的结构是经过修改的 Radix Tree (AKA Patricia Trie):
输入:“hey
”
{
"h": {
"value": "h",
"count": 1,
"followables": {
"e": {
"value": "e",
"count": 1,
"followables": {
"y": {
"value": "y",
"count": 1,
"followables": {}
}
}
}
}
}
}
这个数据结构构建得非常完美,我认为这是实现所述目标的最佳结构。我的问题是读取 Radix Tree 数据以定义给定输入的 3 个最可能单词的函数。例如在上面的数据中,如果用户输入“h
”,猜测函数应该return一个这样的对象:
guess : {
1 : "hey",
2 : "",
3 : ""
}
这是我的 code/progress:
学习 - 获取完整的输入字符串并将组合组织到基数树中 (brain
):
function learn(message, brain) {
if (message.length == 0) return {}; // or do something else
var ch = message[0]; // get the first character
if (!brain[ch]) { // create new node when not exists
brain[ch] = {
value: ch,
count: 1,
followables: {}
};
} else { // increment count when exist
brain[ch].count += 1;
}
var substr = message.substring(1); // remove first character
if (substr) { // do it for the remaining substring
brain[ch].followables = learn(substr, brain[ch].followables);
} else {
renderData();
}
return brain;
}
一切就绪。不幸的是,下一个代码,旨在读取数据并猜测用户正在输入的单词,并不好。我在处理对我来说非常复杂的功能时遇到了麻烦。我把它分成小功能,据我所知这是最好的做法,但恐怕我把它弄得一团糟,可能会简单得多:
猜测 - 获取 "learned" 字符串数据并猜测用户可能正在输入的单词:
function guess(progress, brain) {
console.log("Guessing based on: " + progress);
var guesses = {
0: "",
1: "",
2: ""
}
var firstChar = progress[0];
if (brain[firstChar]) {
var step = brain[firstChar];
for (var i = 0; i < progress.length; i++) {
var char = progress[i];
if (step.followables[char]) {
step = step.followables[char];
if (i == progress.length) {
var guesses = nextStrings(step.followables);
renderGuesses(guesses);
}
} else {
renderGuesses(guesses);
}
}
} else {
renderGuesses(guesses);
}
}
function renderGuesses(guesses) {
console.log(guesses);
$('#guess-1').text(guesses[0]);
$('#guess-2').text(guesses[1]);
$('#guess-3').text(guesses[2]);
}
function nextStrings(followables) {
console.log('Searching for next string...');
var results;
if (followables.length > 0) {
results = chooseRoutes(followables);
} else {
results = {
0: "",
1: "",
2: ""
}
}
console.log(result);
return result;
}
function chooseRoutes(followables) {
var results = {
0: {
value: "",
count: 0
},
1: {
value: "",
count: 0
},
2: {
value: "",
count: 0
}
};
for (var i = 0; i < followables.length; i++) {
var count = followables[i].count;
if (count > results[0].count) {
results[0].value = followStr(followables[i], "");
} else if (count > results[1].count) {
results[1].value = followStr(followables[i], "");
} else if (count > results[2].count) {
results[2].value = followStr(followables[i], "");
}
}
console.log(results);
return results;
}
function followStr(followables, str) {
var guess = {
value: "",
count: 0
};
for (var i = 0; i < followables.length; i++) {
if (followables[i].count > guess.count) {
guess = followables[i];
}
}
followables = guess.followables;
if (guess.value != " ") {
str += guess;
followStr(followables, str);
} else {
console.log(str);
return str;
}
}
旁注 - 虽然在字典上进行模糊字符串搜索是一种更常见的方法,但学习方法是根据用户 writing/messaging 风格和支持用户的非标准词汇 ("heyy
", "sup
", ":P
", "lol
") - 这些猜测的结果可以与标准词典结果结合(并优先于)。
您用于字典的结构不正确,它应该包含对象数组。例如,在您输入这些词后:
hi
hi
hi
hi
hi
hey
hello
hella
结构应该是:
history: [{
letter: "h",
count: 8,
followables: [{
letter: "e",
count: 3,
followables: [{
letter: "y",
count: 1,
followables: []
}, {
letter: "l",
count: 2,
followables: [{
letter: "l",
count: 2,
followables: [{
letter: "o",
count: 1,
followables: []
}, {
letter: "a",
count: 1,
followables: []
}]
}]
}]
}, {
letter: "i",
count: 5,
followables: []
}]
}]
我对您创建和存储历史记录的方式(我会使用 localStorage)不感兴趣。重点是深入挖掘树内部以获得建议的递归函数。对于给定的单词,这一个获得最终 followables
:
findChildren: function (node, depth) {
/* Traverse history for current word, there is only one path */
for (k in node) {
if (node[k].letter == app.progress[depth]) {
if (depth + 1 == app.progress.length) {
/* Found it, hope it has followables */
return node[k].followables;
} else {
/* Must go deeper... */
return app.findChildren(node[k].followables, depth + 1);
};
};
};
/* No results */
return false;
}
第二个 (getWord) 创建建议:
countWordsFromNode: function (node) {
for (i in node) {
if (node[i].followables.length) {
app.countWordsFromNode(node[i].followables);
} else {
app.totalInNode++;
};
};
},
getWord: function (node, limit) {
/* First sort by count */
var sorted = node.sort(function (n1, n2) {
return n2.count - n1.count;
});
for (k in sorted) {
app.guesses[app.totalFound].word += sorted[k].letter;
if (sorted[k].followables.length) {
app.totalInNode = 0;
app.countWordsFromNode(sorted[k].followables);
for (m = 1; m < app.totalInNode; m++) {
if ((app.totalFound + m) < limit) {
app.guesses[app.totalFound + m].word += sorted[k].letter;
};
};
app.getWord(sorted[k].followables, limit);
} else {
/* End of word */
app.totalFound++;
};
if (app.totalFound >= limit) {
/* Found all suggestions */
break;
};
};
}
您可以在this Fiddle中查看详细信息,我不得不删除您的一些代码。很容易集成到任何地方,比如你可以设置其他的建议字段,目前有:
guesses: [
{
element: $('#guess-1'),
word: ''
},
{
element: $('#guess-2'),
word: ''
},
{
element: $('#guess-3'),
word: ''
}
]
编辑: 修复了向右添加字母的错误。