变量名以"false"开头,解析为常量
Variable name starts with "false" and is parsed as constant
我正在尝试使用 sly 作为词法分析器和解析器库来编写带有变量的布尔表达式解析器。我正在尝试定义不区分大小写的常量 "true" 和 "false" 但是从该常量名称开始的变量名称存在一些问题。例如,变量 "falseAlarm" 被解析为 "false" 常量和 "Alarm" 变量,因此出现语法错误。我不太擅长解析器,所以我真的不知道如何让它正确。
这是我的代码:
from sly import Lexer, Parser
from dataclasses import dataclass, field
from typing import List
from pprint import pprint
import re
class Lex(Lexer):
tokens = {
LB,
RB,
AND,
OR,
NOT,
TRUE,
FALSE,
ID,
}
ignore = ' \t'
ignore_newline = r'\n+'
LB = r'\('
RB = r'\)'
AND = r'\&\&'
OR = r'\|\|'
NOT = r'(?i)not'
TRUE = r'(?i)true'
FALSE = r'(?i)false'
ID = r'[a-zA-Z][a-zA-Z0-9_]*'
class Pax(Parser):
debugfile = 'parser.out'
tokens = Lex.tokens
@_('boolean_expression boolean_operator boolean_term')
def boolean_expression(self, p):
return (p.boolean_operator, [p.boolean_expression, p.boolean_term])
@_('boolean_term')
def boolean_expression(self, p):
return [p.boolean_term]
@_('AND')
def boolean_operator(self, p):
return p.AND
@_('OR')
def boolean_operator(self, p):
return p.OR
@_('LB boolean_expression RB')
def boolean_term(self, p):
return p.boolean_expression
@_('NOT boolean_term')
def boolean_term(self, p):
return ('not', [p.boolean_term])
@_('boolean_constant')
def boolean_term(self, p):
return p.boolean_constant
@_('ID')
def boolean_term(self, p):
return ('variable', p.ID)
@_('TRUE')
@_('FALSE')
def boolean_constant(self, p):
return ('constant', p)
def error(self, p):
if p:
print(f'Error at token {p.type}, {p.value} at line {p.lineno} col {p.index}')
self.errok()
else:
print('Syntax error at EOF')
TEXT = """
(true || false && true) || falseAlarm
"""
def tokens():
for t in Lex().tokenize(TEXT):
print(t)
yield t
res = Pax().parse(tokens())
print()
pprint(res, indent=4, width=1)
您可以更改正则表达式以包含 word boundaries,即 FALSE = r'\bfalse\b'
我正在尝试使用 sly 作为词法分析器和解析器库来编写带有变量的布尔表达式解析器。我正在尝试定义不区分大小写的常量 "true" 和 "false" 但是从该常量名称开始的变量名称存在一些问题。例如,变量 "falseAlarm" 被解析为 "false" 常量和 "Alarm" 变量,因此出现语法错误。我不太擅长解析器,所以我真的不知道如何让它正确。
这是我的代码:
from sly import Lexer, Parser
from dataclasses import dataclass, field
from typing import List
from pprint import pprint
import re
class Lex(Lexer):
tokens = {
LB,
RB,
AND,
OR,
NOT,
TRUE,
FALSE,
ID,
}
ignore = ' \t'
ignore_newline = r'\n+'
LB = r'\('
RB = r'\)'
AND = r'\&\&'
OR = r'\|\|'
NOT = r'(?i)not'
TRUE = r'(?i)true'
FALSE = r'(?i)false'
ID = r'[a-zA-Z][a-zA-Z0-9_]*'
class Pax(Parser):
debugfile = 'parser.out'
tokens = Lex.tokens
@_('boolean_expression boolean_operator boolean_term')
def boolean_expression(self, p):
return (p.boolean_operator, [p.boolean_expression, p.boolean_term])
@_('boolean_term')
def boolean_expression(self, p):
return [p.boolean_term]
@_('AND')
def boolean_operator(self, p):
return p.AND
@_('OR')
def boolean_operator(self, p):
return p.OR
@_('LB boolean_expression RB')
def boolean_term(self, p):
return p.boolean_expression
@_('NOT boolean_term')
def boolean_term(self, p):
return ('not', [p.boolean_term])
@_('boolean_constant')
def boolean_term(self, p):
return p.boolean_constant
@_('ID')
def boolean_term(self, p):
return ('variable', p.ID)
@_('TRUE')
@_('FALSE')
def boolean_constant(self, p):
return ('constant', p)
def error(self, p):
if p:
print(f'Error at token {p.type}, {p.value} at line {p.lineno} col {p.index}')
self.errok()
else:
print('Syntax error at EOF')
TEXT = """
(true || false && true) || falseAlarm
"""
def tokens():
for t in Lex().tokenize(TEXT):
print(t)
yield t
res = Pax().parse(tokens())
print()
pprint(res, indent=4, width=1)
您可以更改正则表达式以包含 word boundaries,即 FALSE = r'\bfalse\b'