Python PLY Yacc "syntax error"

Python PLY Yacc "syntax error"

好的,所以我正在尝试构建我的迷你语言的解析器(显然),并且设置变量似乎正常工作。但是一旦 Yacc 遇到一个函数定义,它只会给我一个语法错误和几个 EOF 错误(我知道这是从 Yacc 没有剩余规则可设置的时候开始的)并且没有其他任何事情发生......我在哪里出错了?

这是我正在解析的语法示例:

$name = "John Doe"
$age = 72
$waterInOceans = 95.4

!testFunction {

}

!testFunction { } 部分定义了一个函数(基于感叹号)。我不知道这对调试是否有用。

# The Lexer

import ply.lex as lex

tokens = ["MINUS", "SEPARATOR", "MODIFIER", "FUNCTION_NAME", "UNDEF_BLOCK", "VARIABLE_NAME", "EQUALS", "STRING", "FLOAT", "INNER_CONTENT", "ARGUMENTS", "INTEGER", "PLUS"]

def t_ARGUMENTS(t): # Finds arguments in calls and function definitions
    r'\(.*\)'
    t.value = t.value[1:-1] # strip parenthesis
    t.value = t.value.split(" && ")
    return t

def t_STRING(t): # finds strings
    r'"\w.+"'
    t.value = t.value[1:-1] # strips the quotation marks of the string
    return t

def t_FLOAT(t): # finds floats
    r'\d+.\d+'
    t.value = float(t.value)
    return t

def t_INTEGER(t):
    r'\d+'
    t.value = int(t.value)
    return t

def t_VARIABLE_NAME(t):
    r'$\w*\b'
    t.value = t.value[1:]
    return t

def t_INNER_CONTENT(t):
    r'\{\n.*\n\}|\{.*\}'
    t.value = t.value[1:-1]
    return t

def t_FUNCTION_NAME(t):
    r'!\w+'
    t.value = t.value[1:]
    return t

t_ignore = r"\n|\t|\r"
t_EQUALS = r"\="
t_PLUS = r"\+"
t_MINUS = r"-"
t_MODIFIER = r"\."
t_SEPARATOR = r"\,"

t_UNDEF_BLOCK = r"\w+" # Any block of text that is left over and isn't assigned by the end (used by functions)

def t_error(t):
    t.lexer.skip(1)

lex.lex()

#opened = open("example.zeq", "r")
#content = opened.read()
#opened.close()

#lex.input(content)

然后是 Yacc 的一半:

# The Yacc parser

import ply.yacc as yacc
import compiler # Get the compiler (tokenizer; compiler.py) which generates tokens
import sys
from os import system


##############
### IGNORE ###
tokens = compiler.tokens
#system("clear")
print("Executing "+sys.argv[1]+" |\n"+("-"*(len(sys.argv[1])+12)))
### IGNORE ###
##############


VARIABLES = {}
FUNCTIONS = {}

def p_assign(p): # Set new variable
    '''assignment : VARIABLE_NAME EQUALS compound
                  | VARIABLE_NAME EQUALS STRING
                  | VARIABLE_NAME EQUALS INTEGER
                  | VARIABLE_NAME EQUALS FLOAT'''

    #print("Setting '{}' to '{}'...".format(str(p[1]), str(p[3])))
    VARIABLES[p[1]] = p[3]

def p_number(p): # Combines floats and integers into a blanket non-terminal for simplicity sakes
    '''number : FLOAT
              | INTEGER'''
    p[0] = p[1]

def p_compound(p): # Complete the value *before* the variable is assigned!
    '''compound : number PLUS number
                | number MINUS number'''

    type1 = type(p[1])
    type2 = type(p[3])
    operator = p[2]
    if operator == "+":
        p[0] = p[1] + p[3]
    elif operator == "-":
        p[0] = p[1] - p[3]

def p_undefined(p):
    '''undefined : UNDEF_BLOCK'''
    print("Undefined block")

def p_function(p):
    '''function : FUNCTION_NAME INNER_CONTENT'''

    print("Creating a function")

    name = p[1]
    content = p[2]

    FUNCTIONS[name] = content

def p_empty(p):
    '''empty : '''

#~ def p_error(p):
    #~ if p:
        #~ print("Syntax error: "+p.type)
    #~ else:
        #~ pass

parser = yacc.yacc()

opened = open(sys.argv[1], "r")
content = opened.read()
opened.close()

for line in content.splitlines():
    parser.parse(line)

print(VARIABLES)
print(FUNCTIONS)

我正在等待它成为一个简单的被忽视的细节...

当您要求 Ply(或 yacc,就此而言)解析输入时,它会尝试识别顶级非终端(或 "starting symbol")的单个实例。这通常是对整个输入的语法描述,因此它通常有一个像 program 这样的名称,尽管在某些用例中只解析输入的一部分很有用。

Ply(和 yacc)假定第一个文法产生式是针对起始符号的。在您的例子中,第一个产生式是 assignment,因此它将尝试解析(而不是其他)。 assignment 无法派生函数定义或任何其他语句类型,因此这些会导致语法错误。

如果你想明确告诉 Ply 顶级符号是什么,你可以这样做。参见 the manual section on starting symbols