使用 PLY 强制执行文件结构
Enforce structure of file with PLY
我正在尝试处理包含算法行为描述的输入文件。我正在使用 python 的 PLY 模块来定义词法分析器和解析器。我偶然发现了定义语法的问题,该语法将强制用户正确编写此文件。
文件
# Beginning of the first section
STATES = INITIATOR, IDLE, DONE;
INIT = INITIATOR, IDLE;
TERM = DONE;
# End of first section
# Beginning of the second section
INITIATOR
RANDOM
begin
SEND(x, NEIGHBORS);
BECOME(DONE);
end
IDLE
RECEIVE(x)
begin
SEND(x, NEIGHBORS);
BECOME(DONE);
end
# End of second section
词法分析器
import ply.lex as lex
from soda.helpers import prepare_file
class Lexer(object):
keywords = (
'INIT', 'TERM', 'STATES', 'REGISTERS',
'begin', 'end',
'SEND', 'BECOME'
)
tokens = keywords + (
'NAME', 'EQUALS', 'COMMA', 'SEMICOLON',
'LPAREN', 'RPAREN'
)
# Tokens
t_EQUALS = r'='
t_COMMA = r','
t_SEMICOLON = r';'
t_STATES = r'STATES'
t_REGISTERS = r'REGISTERS'
t_INIT = r'INIT'
t_TERM = r'TERM'
t_begin = r'begin'
t_end = r'end'
t_SEND = r'SEND'
t_BECOME = r'BECOME'
t_LPAREN = r'\('
t_RPAREN = r'\)'
# Ignored characters
t_ignore = ' \t\n'
def t_NAME(self, t):
r'[a-zA-Z][a-zA-Z]*'
if t.value in self.keywords: # is this a keyword?
t.type = t.value
return t
def t_error(self, t):
print ("Illegal character {0} at line {1}".format(t.value[0], t.lineno))
t.lexer.skip(1)
def build(self, **kwargs):
self._lexer = lex.lex(module=self, **kwargs)
@prepare_file
def lexical_analysis(self, file):
print ("Started lexical analysis...")
for line in file:
try:
lex_input = line
except EOFError:
break
self._lexer.input(lex_input)
while True:
token = self._lexer.token()
if not token:
break
print (" ", token)
解析器
import ply.yacc as yacc
from soda.helpers import prepare_file
class Parser(object):
def p_algorithm(self, p):
''' algorithm : first_section second_section'''
def p_first_section(self, p):
''' first_section : STATES EQUALS states_list SEMICOLON
| REGISTERS EQUALS register_list SEMICOLON
| INIT EQUALS init_list SEMICOLON
| TERM EQUALS term_list SEMICOLON'''
def p_states_list(self, p):
''' states_list : state_term
| states_list COMMA state_term'''
def p_state_term(self, p):
''' state_term : NAME'''
self.behavior.states.append(p[1])
def p_register_list(self, p):
''' register_list : register_term
| register_list COMMA register_term'''
def p_register_term(self, p):
''' register_term : NAME'''
self.behavior.registers.append(p[1])
def p_init_list(self, p):
''' init_list : init_term
| init_list COMMA init_term'''
def p_init_term(self, p):
''' init_term : NAME'''
self.behavior.init_states.append(p[1])
def p_term_list(self, p):
''' term_list : term_term
| term_list COMMA term_term'''
def p_term_term(self, p):
''' term_term : NAME'''
self.behavior.term_states.append(p[1])
def p_second_section(self, p):
''' second_section : NAME begin commands end'''
def p_error(self, p):
print("Syntax error in input! -> {}".format(p))
def build(self, lexer, behavior):
self.lexer = lexer
self.behavior = behavior
self.tokens = lexer.tokens
self._parser = yacc.yacc(module=self)
@prepare_file
def parsing(self, file):
for line in file:
try:
parser_input = line
print (line)
except EOFError:
break
self._parser.parse(parser_input, lexer=self.lexer._lexer)
解析导致语法错误,我不确定如何定义规则来强制文件与算法行为的一致性。 first_section 解析正常,问题是 second_section。我的解决方案定义了 algorithm : first_section second_section 并且它不起作用。我试图将其定义为 algorithm: first_section | second_section 效果很好,但这条规则规定第一和第二部分可以在文件中切换。
所以我的问题是如何使用规则强制执行它,以便用户保持输入文件的一致性。
错误输出
enter STATES = INITIATOR, IDLE, DONE;
Syntax error in input! -> None
INIT = INITIATOR, IDLE;
Syntax error in input! -> None
TERM = DONE;
Syntax error in input! -> None
INITIATOR
Syntax error in input! -> LexToken(NAME,'INITIATOR',1,0)
begin
Syntax error in input! -> LexToken(begin,'begin',1,0)
程序只是说语法有错误。问题不在于词法分析,而在于定义的语法。我可以以接受输入的方式定义它,但例如用户可以将 first_section
切换为 second_section
.
编辑
我觉得这个问题不清楚我想达到什么目的或者我的问题,所以我投票关闭它。我想到了如何更好地陈述我正在寻找的东西,所以我想提出新问题。
哎呀!您的语法逐行解析文件,这至少不常见并且不允许控制行的顺序。恕我直言,您应该将文件作为一个整体进行解析。诀窍是向解析器传递一个 tokenfunc
函数,该函数一次向词法分析器提供一行,并声明每个部分由以下行组成:
class Parser(object):
def p_algorithm(self, p):
''' algorithm : first_section second_section'''
def p_first_section(self, p):
''' first_section : first_section_line
| first_section_line first_section'''
def p_first_section_line(self, p):
''' first_section_line : STATES EQUALS states_list SEMICOLON
| REGISTERS EQUALS register_list SEMICOLON
| INIT EQUALS init_list SEMICOLON
| TERM EQUALS term_list SEMICOLON'''
...
# same for second section...
@prepare_file
def parsing(self, file):
def get_token():
'a tokenizer that automatically feeds the lexer with the next line'
while True:
tok = self.lexer._lexer.token()
if tok is not None: return tok
try:
line = next(file)
self.lexer._lexer.input(line)
except StopIteration:
return None
self._parser.parse("", lexer=self.lexer._lexer, tokenfunc = get_token)
我正在尝试处理包含算法行为描述的输入文件。我正在使用 python 的 PLY 模块来定义词法分析器和解析器。我偶然发现了定义语法的问题,该语法将强制用户正确编写此文件。
文件
# Beginning of the first section
STATES = INITIATOR, IDLE, DONE;
INIT = INITIATOR, IDLE;
TERM = DONE;
# End of first section
# Beginning of the second section
INITIATOR
RANDOM
begin
SEND(x, NEIGHBORS);
BECOME(DONE);
end
IDLE
RECEIVE(x)
begin
SEND(x, NEIGHBORS);
BECOME(DONE);
end
# End of second section
词法分析器
import ply.lex as lex
from soda.helpers import prepare_file
class Lexer(object):
keywords = (
'INIT', 'TERM', 'STATES', 'REGISTERS',
'begin', 'end',
'SEND', 'BECOME'
)
tokens = keywords + (
'NAME', 'EQUALS', 'COMMA', 'SEMICOLON',
'LPAREN', 'RPAREN'
)
# Tokens
t_EQUALS = r'='
t_COMMA = r','
t_SEMICOLON = r';'
t_STATES = r'STATES'
t_REGISTERS = r'REGISTERS'
t_INIT = r'INIT'
t_TERM = r'TERM'
t_begin = r'begin'
t_end = r'end'
t_SEND = r'SEND'
t_BECOME = r'BECOME'
t_LPAREN = r'\('
t_RPAREN = r'\)'
# Ignored characters
t_ignore = ' \t\n'
def t_NAME(self, t):
r'[a-zA-Z][a-zA-Z]*'
if t.value in self.keywords: # is this a keyword?
t.type = t.value
return t
def t_error(self, t):
print ("Illegal character {0} at line {1}".format(t.value[0], t.lineno))
t.lexer.skip(1)
def build(self, **kwargs):
self._lexer = lex.lex(module=self, **kwargs)
@prepare_file
def lexical_analysis(self, file):
print ("Started lexical analysis...")
for line in file:
try:
lex_input = line
except EOFError:
break
self._lexer.input(lex_input)
while True:
token = self._lexer.token()
if not token:
break
print (" ", token)
解析器
import ply.yacc as yacc
from soda.helpers import prepare_file
class Parser(object):
def p_algorithm(self, p):
''' algorithm : first_section second_section'''
def p_first_section(self, p):
''' first_section : STATES EQUALS states_list SEMICOLON
| REGISTERS EQUALS register_list SEMICOLON
| INIT EQUALS init_list SEMICOLON
| TERM EQUALS term_list SEMICOLON'''
def p_states_list(self, p):
''' states_list : state_term
| states_list COMMA state_term'''
def p_state_term(self, p):
''' state_term : NAME'''
self.behavior.states.append(p[1])
def p_register_list(self, p):
''' register_list : register_term
| register_list COMMA register_term'''
def p_register_term(self, p):
''' register_term : NAME'''
self.behavior.registers.append(p[1])
def p_init_list(self, p):
''' init_list : init_term
| init_list COMMA init_term'''
def p_init_term(self, p):
''' init_term : NAME'''
self.behavior.init_states.append(p[1])
def p_term_list(self, p):
''' term_list : term_term
| term_list COMMA term_term'''
def p_term_term(self, p):
''' term_term : NAME'''
self.behavior.term_states.append(p[1])
def p_second_section(self, p):
''' second_section : NAME begin commands end'''
def p_error(self, p):
print("Syntax error in input! -> {}".format(p))
def build(self, lexer, behavior):
self.lexer = lexer
self.behavior = behavior
self.tokens = lexer.tokens
self._parser = yacc.yacc(module=self)
@prepare_file
def parsing(self, file):
for line in file:
try:
parser_input = line
print (line)
except EOFError:
break
self._parser.parse(parser_input, lexer=self.lexer._lexer)
解析导致语法错误,我不确定如何定义规则来强制文件与算法行为的一致性。 first_section 解析正常,问题是 second_section。我的解决方案定义了 algorithm : first_section second_section 并且它不起作用。我试图将其定义为 algorithm: first_section | second_section 效果很好,但这条规则规定第一和第二部分可以在文件中切换。
所以我的问题是如何使用规则强制执行它,以便用户保持输入文件的一致性。
错误输出
enter STATES = INITIATOR, IDLE, DONE;
Syntax error in input! -> None
INIT = INITIATOR, IDLE;
Syntax error in input! -> None
TERM = DONE;
Syntax error in input! -> None
INITIATOR
Syntax error in input! -> LexToken(NAME,'INITIATOR',1,0)
begin
Syntax error in input! -> LexToken(begin,'begin',1,0)
程序只是说语法有错误。问题不在于词法分析,而在于定义的语法。我可以以接受输入的方式定义它,但例如用户可以将 first_section
切换为 second_section
.
编辑
我觉得这个问题不清楚我想达到什么目的或者我的问题,所以我投票关闭它。我想到了如何更好地陈述我正在寻找的东西,所以我想提出新问题。
哎呀!您的语法逐行解析文件,这至少不常见并且不允许控制行的顺序。恕我直言,您应该将文件作为一个整体进行解析。诀窍是向解析器传递一个 tokenfunc
函数,该函数一次向词法分析器提供一行,并声明每个部分由以下行组成:
class Parser(object):
def p_algorithm(self, p):
''' algorithm : first_section second_section'''
def p_first_section(self, p):
''' first_section : first_section_line
| first_section_line first_section'''
def p_first_section_line(self, p):
''' first_section_line : STATES EQUALS states_list SEMICOLON
| REGISTERS EQUALS register_list SEMICOLON
| INIT EQUALS init_list SEMICOLON
| TERM EQUALS term_list SEMICOLON'''
...
# same for second section...
@prepare_file
def parsing(self, file):
def get_token():
'a tokenizer that automatically feeds the lexer with the next line'
while True:
tok = self.lexer._lexer.token()
if tok is not None: return tok
try:
line = next(file)
self.lexer._lexer.input(line)
except StopIteration:
return None
self._parser.parse("", lexer=self.lexer._lexer, tokenfunc = get_token)