使用 Damerau-Levenshtein 距离比较 code.org 中的文本集

using Damerau-Levenshtein distance to compare sets of text in code.org

对编码不是很了解,我通常使用块编码而不是打字。

我使用了很多我在网上找到的不同的 Levenshtein 距离代码,但大多数都因为某种原因而不起作用

var levDist = function (s, t) {
    var d = []; //2d matrix

    // Step 1
    var n = s.length;
    var m = t.length;

    if (n == 0) return m;
    if (m == 0) return n;

    //Create an array of arrays in javascript (a descending loop is quicker)
    for (var i = n; i >= 0; i--) d[i] = [];

    // Step 2
    for (i = n; i >= 0; i--) d[i][0] = i;
    for (var j = m; j >= 0; j--) d[0][j] = j;

    // Step 3
    for (i = 1; i <= n; i++) {
        var s_i = s.charAt(i - 1);

        // Step 4
        for (j = 1; j <= m; j++) {

            //Check the jagged ld total so far
            if (i == j && d[i][j] > 4) return n;

            var t_j = t.charAt(j - 1);
            var cost = (s_i == t_j) ? 0 : 1; // Step 5

            //Calculate the minimum
            var mi = d[i - 1][j] + 1;
            var b = d[i][j - 1] + 1;
            var c = d[i - 1][j - 1] + cost;

            if (b < mi) mi = b;
            if (c < mi) mi = c;

            d[i][j] = mi; // Step 6

            //Damerau transposition
            if (i > 1 && j > 1 && s_i == t.charAt(j - 2) && s.charAt(i - 2) == t_j) {
                d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost);
            }
        }
    }


    // Step 7
    return d[n][m];
};

这是我写的所有代码(包括最近尝试获取编辑距离)

var levDist = function (s, t) {
    var d = []; //2d matrix

    // Step 1
    var n = s.length;
    var m = t.length;

    if (n == 0) return m;
    if (m == 0) return n;

    //Create an array of arrays in javascript (a descending loop is quicker)
    for (var i = n; i >= 0; i--) d[i] = [];

    // Step 2
    for (i = n; i >= 0; i--) d[i][0] = i;
    for (var j = m; j >= 0; j--) d[0][j] = j;

    // Step 3
    for (i = 1; i <= n; i++) {
        var s_i = s.charAt(i - 1);

        // Step 4
        for (j = 1; j <= m; j++) {

            //Check the jagged ld total so far
            if (i == j && d[i][j] > 4) return n;

            var t_j = t.charAt(j - 1);
            var cost = (s_i == t_j) ? 0 : 1; // Step 5

            //Calculate the minimum
            var mi = d[i - 1][j] + 1;
            var b = d[i][j - 1] + 1;
            var c = d[i - 1][j - 1] + cost;

            if (b < mi) mi = b;
            if (c < mi) mi = c;

            d[i][j] = mi; // Step 6

            //Damerau transposition
            if (i > 1 && j > 1 && s_i == t.charAt(j - 2) && s.charAt(i - 2) == t_j) {
                d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost);
            }
        }
    }


    // Step 7
    return d[n][m];
};
var S = "Hello World";
var grossWPM;
var Transparency = 1;
var Timer = 60;
var InitialTime = Timer;
var Texts = getColumn("Texts", "Texts");
var TextLength = getColumn("Texts", "Number of Characters");
var Title = getColumn("Texts", "Titles");
var Author = getColumn("Texts", "Authors");
var TextSelector = randomNumber(0, 19);
console.log("Article #" + (TextSelector + 1));
console.log(TextLength[TextSelector] + " Characters in total");
console.log(Title[TextSelector]);
console.log("By: " + Author[TextSelector]);
var Countdown;
var Countdown = 6;
//Texts are obtained from
//https://data.typeracer.com/pit/texts


onEvent("button1", "click", function( ) {
  timedLoop(1000, function() {
    Countdown = Countdown - 1;
    setText("button1", Countdown - 0);
    timedLoop(100, function() {
      setText("text_area2", "");
    });
    if (Countdown <= 1) {
      stopTimedLoop();
      setTimeout(function() {
        setText("button1", "GO!");
        setText("text_area1", Texts[TextSelector]);
        if (getText("button1") == "GO!") {
          var TransparentLoop = timedLoop(100, function() {
            Transparency = Transparency - 0.1;
            setProperty("Warning", "text-color", rgb(77,87,95, Transparency));
            if (Transparency <= 0) {
              deleteElement("Warning");
              showElement("label2");
              stopTimedLoop(TransparentLoop);
            }
          });
          var TimerLoop = timedLoop(1000, function() {
            Timer = Timer - 1;
            setText("label2", Timer);
            if (Timer <= 0) {
              grossWPM = (TextLength[TextSelector] / 5) / ((InitialTime - Timer) / 60);
              console.log(grossWPM);
              setScreen("screen2");
              if (Timer == 1) {
                S = " second";
              } else {
                S = " seconds";
              }
              setText("label1", "Your typing speed was approximately " + (Math.round(grossWPM) + (" WPM* with " + (Timer + (S + " left")))));
              stopTimedLoop(TimerLoop);
            }
          });
          console.log("Timer Started");
          timedLoop(10, function() {
            var str = getText("text_area2");
            
            if (str.length == TextLength[TextSelector]) {
              stopTimedLoop(TimerLoop);
              grossWPM = (TextLength[TextSelector] / 5) / ((InitialTime - Timer) / 60);
              setScreen("screen2");
              levDist(str, Texts[TextSelector]);
              if (Timer == 1) {
                S = " second";
              } else {
                S = " seconds";
              }
              setText("label1", "Your typing speed was approximately " + (Math.round(grossWPM) + (" WPM* with " + (Timer + (S + " left")))));
              if (grossWPM == 69) {
                setText("label4", "Nice");
              }
              stopTimedLoop();
            }
          });
        }
      }, 1000);
    }
  });
});

显然不太擅长这个所以有人可以帮忙吗?

我要比较两组文字

  1. 用户输入的内容。
  2. 用户应该输入的段落。 这是针对 WPM 测试的,我想要一种方法来测量 WPM,其中包括用户在键入时所犯的错误。

如果除了 Levenshtein 距离之外还有其他方法可以检查这个,请告诉我,我只是查找了一种方法,而 Levenshtein 距离似乎是这样做的方法

code.org给出的错误说:

ERROR: Line: 50: TypeError: d[n] is undefined

我解决了这个问题,我使用了这个代码

function levenshtein(s1, s2) {

  if (s1 == s2) {
    return 0;
  }

  var s1_len = s1.length;
  var s2_len = s2.length;
  if (s1_len === 0) {
    return s2_len;
  }
  if (s2_len === 0) {
    return s1_len;
  }

  // BEGIN STATIC
  var split = false;
  try {
    split = !('0')[0];
  } catch (e) {
    // Earlier IE may not support access by string index
    split = true;
  }
  // END STATIC
  if (split) {
    s1 = s1.split('');
    s2 = s2.split('');
  }

  var v0 = new Array(s1_len + 1);
  var v1 = new Array(s1_len + 1);

  var s1_idx = 0,
    s2_idx = 0,
    cost = 0;
  for (s1_idx = 0; s1_idx < s1_len + 1; s1_idx++) {
    v0[s1_idx] = s1_idx;
  }
  var char_s1 = '',
    char_s2 = '';
  for (s2_idx = 1; s2_idx <= s2_len; s2_idx++) {
    v1[0] = s2_idx;
    char_s2 = s2[s2_idx - 1];

    for (s1_idx = 0; s1_idx < s1_len; s1_idx++) {
      char_s1 = s1[s1_idx];
      cost = (char_s1 == char_s2) ? 0 : 1;
      var m_min = v0[s1_idx + 1] + 1;
      var b = v1[s1_idx] + 1;
      var c = v0[s1_idx] + cost;
      if (b < m_min) {
        m_min = b;
      }
      if (c < m_min) {
        m_min = c;
      }
      v1[s1_idx + 1] = m_min;
    }
    var v_tmp = v0;
    v0 = v1;
    v1 = v_tmp;
  }
  return v0[s1_len];
}

我从 this 问题

中得到了代码

这是编辑距离而非 damerau-levenshtein 距离