变量名以"false"开头,解析为常量

Variable name starts with "false" and is parsed as constant

我正在尝试使用 sly 作为词法分析器和解析器库来编写带有变量的布尔表达式解析器。我正在尝试定义不区分大小写的常量 "true" 和 "false" 但是从该常量名称开始的变量名称存在一些问题。例如,变量 "falseAlarm" 被解析为 "false" 常量和 "Alarm" 变量,因此出现语法错误。我不太擅长解析器,所以我真的不知道如何让它正确。

这是我的代码:

from sly import Lexer, Parser
from dataclasses import dataclass, field
from typing import List
from pprint import pprint

import re


class Lex(Lexer):
    tokens = {
        LB,
        RB,
        AND,
        OR,
        NOT,
        TRUE,
        FALSE,
        ID,
    }

    ignore = ' \t'
    ignore_newline = r'\n+'

    LB = r'\('
    RB = r'\)'
    AND = r'\&\&'
    OR = r'\|\|'
    NOT = r'(?i)not'
    TRUE = r'(?i)true'
    FALSE = r'(?i)false'
    ID = r'[a-zA-Z][a-zA-Z0-9_]*'


class Pax(Parser):
    debugfile = 'parser.out'

    tokens = Lex.tokens

    @_('boolean_expression boolean_operator boolean_term')
    def boolean_expression(self, p):
        return (p.boolean_operator, [p.boolean_expression, p.boolean_term])

    @_('boolean_term')
    def boolean_expression(self, p):
        return [p.boolean_term]

    @_('AND')
    def boolean_operator(self, p):
        return p.AND

    @_('OR')
    def boolean_operator(self, p):
        return p.OR

    @_('LB boolean_expression RB')
    def boolean_term(self, p):
        return p.boolean_expression

    @_('NOT boolean_term')
    def boolean_term(self, p):
        return ('not', [p.boolean_term])

    @_('boolean_constant')
    def boolean_term(self, p):
        return p.boolean_constant

    @_('ID')
    def boolean_term(self, p):
        return ('variable', p.ID)

    @_('TRUE')
    @_('FALSE')
    def boolean_constant(self, p):
        return ('constant', p)


    def error(self, p):
        if p:
            print(f'Error at token {p.type}, {p.value} at line {p.lineno} col {p.index}')
            self.errok()
        else:
            print('Syntax error at EOF')


TEXT = """
(true || false && true) || falseAlarm
"""

def tokens():
    for t in Lex().tokenize(TEXT):
        print(t)
        yield t

res = Pax().parse(tokens())
print()
pprint(res, indent=4, width=1)

您可以更改正则表达式以包含 word boundaries,即 FALSE = r'\bfalse\b'