解析 python 中的嵌套函数
parsing nested functions in python
line = "add(multiply(add(2,3),add(4,5)),1)"
def readLine(line):
countLeftBracket=0
string = ""
for char in line:
if char !=")":
string += char
else:
string +=char
break
for i in string:
if i=="(":
countLeftBracket+=1
if countLeftBracket>1:
cutString(string)
else:
return execute(string)
def cutString(string):
countLeftBracket=0
for char in string:
if char!="(":
string.replace(char,'')
elif char=="(":
string.replace(char,'')
break
for char in string:
if char=="(":
countLeftBracket+=1
if countLeftBracket>1:
cutString(string)
elif countLeftBracket==1:
return execute(string)
def add(num1,num2):
return print(num1+num2)
def multiply(num1,num2):
return print(num1*num2)
readLines(line)
我需要执行整行字符串。我试图一个一个地删除括号内的每个函数并用结果替换它们,但我有点迷路了。不确定如何继续,我的代码出现错误:
File "main.py", line 26, in cutString
if char!="(":
RuntimeError: maximum recursion depth exceeded in comparison
告诉我移动到哪里,使用哪种方法?
您可以使用生成器函数构建一个非常简单的解析器:
import re, operator
line, f = "add(multiply(add(2,3),add(4,5)),1)", {'add':operator.add, 'multiply':operator.mul}
def parse(d):
n = next(d, None)
if n is not None and n != ')':
if n == '(':
yield iter(parse(d))
else:
yield n
yield from parse(d)
parsed = parse(iter(re.findall('\(|\)|\w+', line)))
def _eval(d):
_r = []
n = next(d, None)
while n is not None:
if n.isdigit():
_r.append(int(n))
else:
_r.append(f[n](*_eval(next(d))))
n = next(d, None)
return _r
print(_eval(parsed)[0])
输出:
46
听起来这可以用正则表达式解决。
所以这是一个单一归约的例子
import re, operator
def apply(match):
func_name = match.group(1) # what's outside the patentesis
func_args = [int(x) for x in match.group(2).split(',')]
func = {"add": operator.add, "multiply": operator.mul}
return str(func[func_name](*func_args))
def single_step(line):
return re.sub(r"([a-z]+)\(([^()]+)\)",apply,line)
例如:
line = "add(multiply(add(2,3),add(4,5)),1)"
print(single_step(line))
会输出:
add(multiply(5,9),1)
剩下要做的就是循环直到表达式是一个数字
while not line.isdigit():
line = single_step(line)
print (line)
会显示
46
这是一个使用 pyparsing 的解决方案,因此更容易扩展:
from pyparsing import *
首先是一个方便的函数(使用第二个标记函数并打印解析树以查看原因)
def tag(name):
"""This version converts ["expr", 4] => 4
comment in the version below to see the original parse tree
"""
def tagfn(tokens):
tklist = tokens.asList()
if name == 'expr' and len(tklist) == 1:
# LL1 artifact removal
return tklist
return tuple([name] + tklist)
return tagfn
# def tag(name):
# return lambda tokens: tuple([name] + tokens.asList())
我们的词法分析器需要识别左右括号、整数和名称。这就是你用 pyparsing 定义它们的方式:
LPAR = Suppress("(")
RPAR = Suppress(")")
integer = Word(nums).setParseAction(lambda s,l,t: [int(t[0])])
name = Word(alphas)
我们的解析器有函数调用,它以零个或多个表达式作为参数。一个函数调用也是一个表达式,所以为了处理循环我们必须转发声明 expr 和 fncall:
expr = Forward()
fncall = Forward()
expr << (integer | fncall).setParseAction(tag('expr'))
fnparams = delimitedList(expr)
fncall << (name + Group(LPAR + Optional(fnparams, default=[]) + RPAR)).setParseAction(tag('fncall'))
现在我们可以解析我们的字符串(我们也可以向函数添加空格和多于或少于两个参数):
line = "add(multiply(add(2,3),add(4,5)),1)"
res = fncall.parseString(line)
要查看返回的内容,您可以打印它,这称为解析树(或者,由于我们的标记函数对其进行了简化,因此称为抽象语法树):
import pprint
pprint.pprint(list(res))
输出:
[('fncall',
'add',
[('fncall',
'multiply',
[('fncall', 'add', [2, 3]), ('fncall', 'add', [4, 5])]),
1])]
使用注释掉的标签函数(这只是为了处理更多的工作而没有额外的好处):
[('fncall',
'add',
[('expr',
('fncall',
'multiply',
[('expr', ('fncall', 'add', [('expr', 2), ('expr', 3)])),
('expr', ('fncall', 'add', [('expr', 4), ('expr', 5)]))])),
('expr', 1)])]
现在定义我们程序可用的函数:
FUNCTIONS = {
'add': lambda *args: sum(args, 0),
'multiply': lambda *args: reduce(lambda a, b: a*b, args, 1),
}
# print FUNCTIONS['multiply'](1,2,3,4) # test that it works ;-)
我们的解析器现在写起来非常简单:
def parse(ast):
if not ast: # will not happen in our program, but it's good practice to exit early on no input
return
if isinstance(ast, tuple) and ast[0] == 'fncall':
# ast is here ('fncall', <name-of-function>, [list-of-arguments])
fn_name = ast[1] # get the function name
fn_args = parse(ast[2]) # parse each parameter (see elif below)
return FUNCTIONS[fn_name](*fn_args) # find and apply the function to its arguments
elif isinstance(ast, list):
# this is called when we hit a parameter list
return [parse(item) for item in ast]
elif isinstance(ast, int):
return ast
现在根据词法分析阶段的结果调用解析器:
>>> print parse(res[0]) # the outermost item is an expression
46
line = "add(multiply(add(2,3),add(4,5)),1)"
def readLine(line):
countLeftBracket=0
string = ""
for char in line:
if char !=")":
string += char
else:
string +=char
break
for i in string:
if i=="(":
countLeftBracket+=1
if countLeftBracket>1:
cutString(string)
else:
return execute(string)
def cutString(string):
countLeftBracket=0
for char in string:
if char!="(":
string.replace(char,'')
elif char=="(":
string.replace(char,'')
break
for char in string:
if char=="(":
countLeftBracket+=1
if countLeftBracket>1:
cutString(string)
elif countLeftBracket==1:
return execute(string)
def add(num1,num2):
return print(num1+num2)
def multiply(num1,num2):
return print(num1*num2)
readLines(line)
我需要执行整行字符串。我试图一个一个地删除括号内的每个函数并用结果替换它们,但我有点迷路了。不确定如何继续,我的代码出现错误:
File "main.py", line 26, in cutString
if char!="(":
RuntimeError: maximum recursion depth exceeded in comparison
告诉我移动到哪里,使用哪种方法?
您可以使用生成器函数构建一个非常简单的解析器:
import re, operator
line, f = "add(multiply(add(2,3),add(4,5)),1)", {'add':operator.add, 'multiply':operator.mul}
def parse(d):
n = next(d, None)
if n is not None and n != ')':
if n == '(':
yield iter(parse(d))
else:
yield n
yield from parse(d)
parsed = parse(iter(re.findall('\(|\)|\w+', line)))
def _eval(d):
_r = []
n = next(d, None)
while n is not None:
if n.isdigit():
_r.append(int(n))
else:
_r.append(f[n](*_eval(next(d))))
n = next(d, None)
return _r
print(_eval(parsed)[0])
输出:
46
听起来这可以用正则表达式解决。
所以这是一个单一归约的例子
import re, operator
def apply(match):
func_name = match.group(1) # what's outside the patentesis
func_args = [int(x) for x in match.group(2).split(',')]
func = {"add": operator.add, "multiply": operator.mul}
return str(func[func_name](*func_args))
def single_step(line):
return re.sub(r"([a-z]+)\(([^()]+)\)",apply,line)
例如:
line = "add(multiply(add(2,3),add(4,5)),1)"
print(single_step(line))
会输出:
add(multiply(5,9),1)
剩下要做的就是循环直到表达式是一个数字
while not line.isdigit():
line = single_step(line)
print (line)
会显示
46
这是一个使用 pyparsing 的解决方案,因此更容易扩展:
from pyparsing import *
首先是一个方便的函数(使用第二个标记函数并打印解析树以查看原因)
def tag(name):
"""This version converts ["expr", 4] => 4
comment in the version below to see the original parse tree
"""
def tagfn(tokens):
tklist = tokens.asList()
if name == 'expr' and len(tklist) == 1:
# LL1 artifact removal
return tklist
return tuple([name] + tklist)
return tagfn
# def tag(name):
# return lambda tokens: tuple([name] + tokens.asList())
我们的词法分析器需要识别左右括号、整数和名称。这就是你用 pyparsing 定义它们的方式:
LPAR = Suppress("(")
RPAR = Suppress(")")
integer = Word(nums).setParseAction(lambda s,l,t: [int(t[0])])
name = Word(alphas)
我们的解析器有函数调用,它以零个或多个表达式作为参数。一个函数调用也是一个表达式,所以为了处理循环我们必须转发声明 expr 和 fncall:
expr = Forward()
fncall = Forward()
expr << (integer | fncall).setParseAction(tag('expr'))
fnparams = delimitedList(expr)
fncall << (name + Group(LPAR + Optional(fnparams, default=[]) + RPAR)).setParseAction(tag('fncall'))
现在我们可以解析我们的字符串(我们也可以向函数添加空格和多于或少于两个参数):
line = "add(multiply(add(2,3),add(4,5)),1)"
res = fncall.parseString(line)
要查看返回的内容,您可以打印它,这称为解析树(或者,由于我们的标记函数对其进行了简化,因此称为抽象语法树):
import pprint
pprint.pprint(list(res))
输出:
[('fncall',
'add',
[('fncall',
'multiply',
[('fncall', 'add', [2, 3]), ('fncall', 'add', [4, 5])]),
1])]
使用注释掉的标签函数(这只是为了处理更多的工作而没有额外的好处):
[('fncall',
'add',
[('expr',
('fncall',
'multiply',
[('expr', ('fncall', 'add', [('expr', 2), ('expr', 3)])),
('expr', ('fncall', 'add', [('expr', 4), ('expr', 5)]))])),
('expr', 1)])]
现在定义我们程序可用的函数:
FUNCTIONS = {
'add': lambda *args: sum(args, 0),
'multiply': lambda *args: reduce(lambda a, b: a*b, args, 1),
}
# print FUNCTIONS['multiply'](1,2,3,4) # test that it works ;-)
我们的解析器现在写起来非常简单:
def parse(ast):
if not ast: # will not happen in our program, but it's good practice to exit early on no input
return
if isinstance(ast, tuple) and ast[0] == 'fncall':
# ast is here ('fncall', <name-of-function>, [list-of-arguments])
fn_name = ast[1] # get the function name
fn_args = parse(ast[2]) # parse each parameter (see elif below)
return FUNCTIONS[fn_name](*fn_args) # find and apply the function to its arguments
elif isinstance(ast, list):
# this is called when we hit a parameter list
return [parse(item) for item in ast]
elif isinstance(ast, int):
return ast
现在根据词法分析阶段的结果调用解析器:
>>> print parse(res[0]) # the outermost item is an expression
46