如何通过 Levenshtein 算法使用动态规划(in Javascript)
How to use dynamic programming through Levenshtein algorithm (in Javascript)
我试图通过 Levenshtein 算法来理解动态规划,但我已经坚持了几个小时了。我知道我对以下问题的尝试是 'brute force' 一个。我将如何使用 "dynamic programming" 来改变我的方法?我迷路了....
Problem: Given two strings, s and t, with lengths of n and m, create a
function that returns one of the following strings: "insert C" if
string t can be obtained from s by inserting character C "delete C"
(same logic as above) "swap c d" if string t can be obtained from
string s by swapping two adjacent characters (c and d) which appear in
that order in the original string. "Nothing" if no operation is
needed "impossible" if none of the above works ie LevenShtein distance is greater than 1.
这是我的蛮力尝试。 "tuple" 变量命名错误,因为我最初想将索引和值推送到矩阵但卡住了。
function levenshtein(str1, str2) {
var m = str1.length,
n = str2.length,
d = [],
i, j,
vals = [],
vals2 = [];
for (i = 0; i <= m ; i++) {
var tuple = [str1[i]];
//console.log(tuple);
// console.log(tuple);
d[i] = [i];
// console.log(str1[i]);
vals.push(tuple);
}
vals = [].concat.apply([], vals);
vals = vals.filter(function(n){ return n; });
console.log(vals);
for (j = 0; j <= n; j++) {
d[0][j] = j;
var tuple2 = [str2[j]];
// console.log(tuple2);
vals2.push(tuple2);
// console.log(vals2);
}
vals2 = [].concat.apply([], vals2);
vals2 = vals2.filter(function(n){ return n ;});
console.log(vals2);
for (j = 1; j <= n; j++) {
for (i = 1; i <= m; i++) {
if (str1[i - 1] == str2[j - 1]) d[i][j] = d[i - 1][j - 1];
else d[i][j] = Math.min(d[i - 1][j], d[i][j - 1], d[i - 1][j - 1]) + 1;
}
}
var val = d[m][n];
// console.log(d);
if(val > 1){
return "IMPOSSIBLE";
}
if(val === 0){
return "NOTHING";
}
//console.log(d);
if(val === 1){
//find the missing element between the vals
//return "INSERT " + missing element
//find the extra element
//return "DELETE + " extra element
//find the out of place element and swap with another
}
}
console.log(levenshtein("kitten", "mitten"));
// console.log(levenshtein("stop", "tops"));
// console.log(levenshtein("blahblah", "blahblah"));
无法使用动态规划优化所描述的问题,因为它只涉及单个决策,而不是一系列决策。
请注意,该问题明确指出,当 Levenshtein 距离大于 1 时,您应该 return "impossible",即不能通过单个操作使字符串相等。如果您想应用动态规划,您需要搜索 零个或多个 操作的序列,这些操作 累积地 会产生最佳解决方案。 (这就是 the dynamic programming wikipedia article 在说您需要 "optimal substructure" 和 "overlapping subproblems" 时所说的内容编程适用。)
如果您将问题更改为计算两个字符串之间的完整编辑距离,那么您可以使用动态规划进行优化,因为您可以重用选择在字符串中的特定位置执行某些操作的结果,以减少搜索的复杂性。
对于给定的问题,您当前的解决方案看起来有点过于复杂。下面有一个更简单的方法你可以研究一下。该解决方案利用了这样一个事实,即您知道您最多只能执行一个操作,并且您可以根据两个字符串的长度差异推断要尝试执行哪个操作。我们还知道,只有在两个字符串不同的地方尝试给定的操作才有意义,而不是在每个位置都尝试。
function lev(s, t) {
// Strings are equal
if (s == t) return "nothing"
// Find difference in string lengths
var delta = s.length - t.length
// Explode strings into arrays
var arrS = s.split("")
var arrT = t.split("")
// Try swapping
if (delta == 0) {
for (var i=0; i<s.length; i++) {
if (arrS[i] != arrT[i]) {
var tmp = arrS[i]
arrS[i] = arrS[i+1]
arrS[i+1] = tmp
if (arrS.join("") == t) {
return "swap " + arrS[i+1] + " " + arrS[i]
}
else break
}
}
}
// Try deleting
else if (delta == 1) {
for (var i=0; i<s.length; i++) {
if (arrS[i] != arrT[i]) {
var c = arrS.splice(i, 1)[0]
if (arrS.join("") == t) {
return "delete " + c
}
else break
}
}
}
// Try inserting
else if (delta == -1) {
for (var i=0; i<t.length; i++) {
if (arrS[i] != arrT[i]) {
arrS.splice(i, 0, arrT[i])
if (arrS.join("") == t) {
return "insert " + arrS[i]
}
else break
}
}
}
// Strings are too different
return "impossible"
}
// output helper
function out(msg) { $("body").append($("<div/>").text(msg)) }
// tests
out(lev("kitten", "mitten"))
out(lev("kitten", "kitten"))
out(lev("kitten", "kitetn"))
out(lev("kiten", "kitten"))
out(lev("kitten", "kittn"))
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
我试图通过 Levenshtein 算法来理解动态规划,但我已经坚持了几个小时了。我知道我对以下问题的尝试是 'brute force' 一个。我将如何使用 "dynamic programming" 来改变我的方法?我迷路了....
Problem: Given two strings, s and t, with lengths of n and m, create a function that returns one of the following strings: "insert C" if string t can be obtained from s by inserting character C "delete C" (same logic as above) "swap c d" if string t can be obtained from string s by swapping two adjacent characters (c and d) which appear in that order in the original string. "Nothing" if no operation is needed "impossible" if none of the above works ie LevenShtein distance is greater than 1.
这是我的蛮力尝试。 "tuple" 变量命名错误,因为我最初想将索引和值推送到矩阵但卡住了。
function levenshtein(str1, str2) {
var m = str1.length,
n = str2.length,
d = [],
i, j,
vals = [],
vals2 = [];
for (i = 0; i <= m ; i++) {
var tuple = [str1[i]];
//console.log(tuple);
// console.log(tuple);
d[i] = [i];
// console.log(str1[i]);
vals.push(tuple);
}
vals = [].concat.apply([], vals);
vals = vals.filter(function(n){ return n; });
console.log(vals);
for (j = 0; j <= n; j++) {
d[0][j] = j;
var tuple2 = [str2[j]];
// console.log(tuple2);
vals2.push(tuple2);
// console.log(vals2);
}
vals2 = [].concat.apply([], vals2);
vals2 = vals2.filter(function(n){ return n ;});
console.log(vals2);
for (j = 1; j <= n; j++) {
for (i = 1; i <= m; i++) {
if (str1[i - 1] == str2[j - 1]) d[i][j] = d[i - 1][j - 1];
else d[i][j] = Math.min(d[i - 1][j], d[i][j - 1], d[i - 1][j - 1]) + 1;
}
}
var val = d[m][n];
// console.log(d);
if(val > 1){
return "IMPOSSIBLE";
}
if(val === 0){
return "NOTHING";
}
//console.log(d);
if(val === 1){
//find the missing element between the vals
//return "INSERT " + missing element
//find the extra element
//return "DELETE + " extra element
//find the out of place element and swap with another
}
}
console.log(levenshtein("kitten", "mitten"));
// console.log(levenshtein("stop", "tops"));
// console.log(levenshtein("blahblah", "blahblah"));
无法使用动态规划优化所描述的问题,因为它只涉及单个决策,而不是一系列决策。
请注意,该问题明确指出,当 Levenshtein 距离大于 1 时,您应该 return "impossible",即不能通过单个操作使字符串相等。如果您想应用动态规划,您需要搜索 零个或多个 操作的序列,这些操作 累积地 会产生最佳解决方案。 (这就是 the dynamic programming wikipedia article 在说您需要 "optimal substructure" 和 "overlapping subproblems" 时所说的内容编程适用。)
如果您将问题更改为计算两个字符串之间的完整编辑距离,那么您可以使用动态规划进行优化,因为您可以重用选择在字符串中的特定位置执行某些操作的结果,以减少搜索的复杂性。
对于给定的问题,您当前的解决方案看起来有点过于复杂。下面有一个更简单的方法你可以研究一下。该解决方案利用了这样一个事实,即您知道您最多只能执行一个操作,并且您可以根据两个字符串的长度差异推断要尝试执行哪个操作。我们还知道,只有在两个字符串不同的地方尝试给定的操作才有意义,而不是在每个位置都尝试。
function lev(s, t) {
// Strings are equal
if (s == t) return "nothing"
// Find difference in string lengths
var delta = s.length - t.length
// Explode strings into arrays
var arrS = s.split("")
var arrT = t.split("")
// Try swapping
if (delta == 0) {
for (var i=0; i<s.length; i++) {
if (arrS[i] != arrT[i]) {
var tmp = arrS[i]
arrS[i] = arrS[i+1]
arrS[i+1] = tmp
if (arrS.join("") == t) {
return "swap " + arrS[i+1] + " " + arrS[i]
}
else break
}
}
}
// Try deleting
else if (delta == 1) {
for (var i=0; i<s.length; i++) {
if (arrS[i] != arrT[i]) {
var c = arrS.splice(i, 1)[0]
if (arrS.join("") == t) {
return "delete " + c
}
else break
}
}
}
// Try inserting
else if (delta == -1) {
for (var i=0; i<t.length; i++) {
if (arrS[i] != arrT[i]) {
arrS.splice(i, 0, arrT[i])
if (arrS.join("") == t) {
return "insert " + arrS[i]
}
else break
}
}
}
// Strings are too different
return "impossible"
}
// output helper
function out(msg) { $("body").append($("<div/>").text(msg)) }
// tests
out(lev("kitten", "mitten"))
out(lev("kitten", "kitten"))
out(lev("kitten", "kitetn"))
out(lev("kiten", "kitten"))
out(lev("kitten", "kittn"))
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>