Nearley语法在特定条件下多次识别相同的非终端符号

Question

给定以下 nearley 代码：

   @builtin "whitespace.ne"

   @{%

   let numberedParams = {
       3: 45
   };

   const lexer = require("moo").compile({
       comment: /\(.*?\)/,
       expstart: /\[/,
       expend: /\]/,
       paramstart: '#',
       equals: "=",
       operator: /\*\*|\+|\-|\*|\/|OR|XOR|AND|MOD|EQ|NE|GT|GE|LT|LE/,
       function: ['ATAN','ABS','ACOS','ASIN','COS','EXP','FIX','FUP','ROUND','LN','SIN','SQRT','TAN','EXISTS'],
       linenumber_command: 'N',
       command: /[ABCDFGHIJKLMPQRSTUVWXYZ]/,
       float: /[0-9]*\.[0-9]+/,
       int: /[0-9]+/,
       ws: /[ \t]+/,
       EOL: {match: /(?:\r\n?|\n)/, lineBreaks: true }
   });

   // Converts from degrees to radians.
   Math.radians = function(degrees) {
     return degrees * Math.PI / 180;
   };

   // Converts from radians to degrees.
   Math.degrees = function(radians) {
     return radians * 180 / Math.PI;
   };

   function empty(d) {
       return null;
   }

   function logid(prefix) {
       return function (d) {
           console.log(prefix, d);
           return d;
       };
   }

   // Appends to list
   function append(d) {
       if (Array.isArray(d[0])) {
           return d[0].concat(d[2]);
       }
       return [d[0], d[2]];
   }

   function getparam(d) {
       return numberedParams[String(d[1])] | 0.0;
   }

   // Returns the result of one parsed line
   function processLine(d) {

       logid("processLine")(d);

       if (d[0] != null) {
           return [];
       }

       if (d[2] != null) {
           return [d[2]].concat(d[4]);
       }

       if (! Array.isArray(d[4])) {
           return [d[4]];
       }

       return d[4];
   }


   %}

   @lexer lexer

   line ->
       block_delete:? _ linenumber:? _ line_items EOL {% processLine %}

   block_delete ->
       "/" {% id %}

   linenumber ->
       %linenumber_command int_or_float {% function (d) { return {command: d[0].value, value: d[1]}; } %}

   line_items ->
       line_item {% id %}
       | line_items _ line_item {% append %}

   line_item ->
       comment {% id %}
       | parameter_setting {% id %}
       | word {% id %}

   word ->
       %command _ number {% function (d) { return {command: d[0].value, value: d[2]}; } %}

   parameter_start ->
       %paramstart {% id %}

   parameter_setting ->
       parameter_start parameter_index _ %equals _ number {% function (d) { return {command: d[0].value + d[1], value: d[5]}; }  %}

   comment ->
       %comment {% function (d) { return {command: 'COMMENT', value: d[0].value}; } %}

   gcode_expression ->
       "[" _ expression _ "]" {% (d) => d[2] %}

   expression ->
       logical_expression {% id %}

   logical_expression ->
       comparative_expression {% id %}
       | logical_expression _ "AND" _ comparative_expression {% (d) => d[0] && d[4] %}
       | logical_expression _ "OR" _ comparative_expression {% (d) => d[0] || d[4] %}
       | logical_expression _ "XOR" _ comparative_expression {% (d) => ((d[0] && !d[4]) || (!d[0] && d[4])) %}

   comparative_expression ->
       additive_expression {% id %}
       | comparative_expression _ "EQ" _ additive_expression {% (d) => d[0] == d[4] %}
       | comparative_expression _ "NE" _ additive_expression {% (d) => d[0] != d[4] %}
       | comparative_expression _ "GT" _ additive_expression {% (d) => d[0] > d[4] %}
       | comparative_expression _ "GE" _ additive_expression {% (d) => d[0] >= d[4] %}
       | comparative_expression _ "LT" _ additive_expression {% (d) => d[0] < d[4] %}
       | comparative_expression _ "LE" _ additive_expression {% (d) => d[0] <= d[4] %}

   additive_expression ->
       multiplicative_expression {% id %}
       | additive_expression _ "+"  _ multiplicative_expression {% (d) => d[0] + d[4] %}
       | additive_expression _ "-"  _ multiplicative_expression {% (d) => d[0] - d[4] %}

   multiplicative_expression ->
       power_expression {% id %}
       | multiplicative_expression _ "*"  _ power_expression {% (d) => d[0] * d[4] %}
       | multiplicative_expression _ "/"  _ power_expression {% (d) => d[0] / d[4] %}
       | multiplicative_expression _ "MOD"  _ power_expression {% (d) => d[0] % d[4] %}

   power_expression ->
       function_expression {% id %}
       | power_expression _ "**"  _ function_expression {% (d) => Math.pow(d[0], d[4]) %}

   function_expression ->
       number {% id %}
       | "ATAN" _ gcode_expression _ "/" _ gcode_expression {% (d) => Math.degrees(Math.atan(d[2], d[6])) %}
       | "ABS" _ gcode_expression {% (d) => Math.abs(d[2]) %}
       | "ACOS" _ gcode_expression {% (d) => Math.degrees(Math.acos(d[2])) %}
       | "ASIN" _ gcode_expression {% (d) => Math.degrees(Math.asin(d[2])) %}
       | "COS" _ gcode_expression {% (d) => Math.cos(Math.radians(d[2])) %}
       | "EXP" _ gcode_expression {% (d) => Math.exp(d[2]) %}
       | "FIX" _ gcode_expression {% (d) => Math.floor(d[2]) %}
       | "FUP" _ gcode_expression {% (d) => Math.ceil(d[2]) %}
       | "ROUND" _ gcode_expression {% (d) => Math.round(d[2]) %}
       | "LN" _ gcode_expression {% (d) => Math.log(d[2]) %}
       | "SIN" _ gcode_expression {% (d) => Math.sin(Math.radians(d[2])) %}
       | "SQRT" _ gcode_expression {% (d) => Math.sqrt(d[2]) %}
       | "TAN" _ gcode_expression {% (d) => Math.tan(d[2]) %}
       | "EXISTS" _ gcode_expression {% (d) => Math.cos(d[2]) %}

   number ->
       primary {% id %}
       | _ "-" number {% (d) => d[2] * -1 %}
       | _ "+" number {% (d) => d[2] * 1 %}

   primary ->
       int_or_float {% id %}
       | gcode_expression {% id %}
       | parameter_expression {% id %}

   parameter_expression ->
       parameter_start parameter_index {% getparam %}

   parameter_index ->
       int {% id %}
       | gcode_expression {% id %}
       | parameter_expression {% id %}

   int ->
       %int {% (d) => parseInt(d[0]) %}

   int_or_float ->
       int {% id %}
       | %float {% (d) => parseFloat(d[0]) %}

   EOL ->
       %EOL {% empty %}

当我运行 nearley-test 测试我编译的解析器时，我得到以下结果：

命令：

printf "(Commentwithoutspaces)X[2**[2+5]]\n" | nearley-test gcode.js -q

结果：

processLine [ null,
  null,
  null,
  null,
  [ { command: 'COMMENT', value: '(Commentwithoutspaces)' },
    { command: 'X', value: 128 } ],
  null ]

到目前为止一切顺利，下一次测试：

printf "(Comment with spaces)X[2**[2+5]]\n" | nearley-test gcode.js -q

结果：

processLine [ null, null, null, null, { command: 'X', value: 128 }, null ]
processLine [ null, null, null, null, { command: 'X', value: 128 }, null ]
processLine [ null,
  null,
  null,
  null,
  [ { command: 'COMMENT', value: '(Comment with spaces)' },
    { command: 'X', value: 128 } ],
  null ]

看起来它在没有注释的情况下识别了两次 X 命令，然后在有注释的情况下识别了一次。只有在评论里面有空格的时候才会出现这种情况，我很疑惑...

现在，如果我用行尾包含空格的注释对其进行测试，我得到：

printf "X[2**[2+5]](Comment with spaces)\n" | nearley-test gcode.js -q

结果：

processLine [ null,
  null,
  null,
  null,
  [ { command: 'X', value: 128 },
    { command: 'COMMENT', value: '(Comment with spaces)' } ],
  null ]

所以似乎只有当我以包含空格的注释开始该行时才会发生错误，这很奇怪...

有谁知道我可以做些什么来获得更一致的行为？我的意思是我该怎么做才能使解析器每次出现时都识别一次命令和注释，而不管注释中的空格如何，也不管注释是在行中其他项目之前还是之后？

Answer 1

我发现如果我从非终端命名行中删除可选的空格并将它们添加到 "lower level" 非终端，它会按预期工作。更改后的非终端现在看起来像这样：

line ->
    block_delete:? linenumber:? line_items EOL {% processLine %}

linenumber ->
    _ %linenumber_command int_or_float _ {% function (d) { return {command: d[1].value, value: d[2]}; } %}

Nearley语法在特定条件下多次识别相同的非终端符号

Nearley grammar recognizes same non-terminal symbol multiple times under certain conditions

javascript

grammar

lexer

nearley