使用 Haskell Parsec 解析算术表达式
Parsing arithmetic expression with Haskell Parsec
我正在编写一个算术解析器来处理像“1+2-3”这样的表达式。我使用 this blog post 作为参考。为了处理左结合性和优先级,我根据这个BNF(来自博客post)用Parsec写了一个解析器。
<exp> ::= <term> { ("+" | "-") <term> }
<term> ::= <factor> { ("*" | "/") <factor> }
<factor> ::= "(" <exp> ")" | <unary_op> <factor> | <int>
这是我的解析器代码。
parseExp :: Parser Exp
parseExp = do
t1 <- parseTerm
loop t1
where termSuffix t1 = do
op <- lexeme $ oneOf "+-"
t2 <- parseTerm
case op of
'+' -> termSuffix (Binary Plus t1 t2)
'-' -> termSuffix (Binary Minus t1 t2)
loop t = termSuffix t <|> return t
parseTerm :: Parser Exp
parseTerm = do
f1 <- parseFactor
loop f1
where factorSuffix f1 = do
op <- lexeme $ oneOf "*/"
f2 <- parseFactor
case op of
'*' -> factorSuffix (Binary Mul f1 f2)
'/' -> factorSuffix (Binary Div f1 f2)
loop t = factorSuffix t <|> return t
parseFactor :: Parser Exp
parseFactor = parseConst <|> parseParen <|> parseUnary
parseParen = do
void $ lexeme $ char '('
e <- parseExp
void $ lexeme $ char ')'
return e
parseUnary :: Parser Exp
parseUnary = do
op <- lexeme $ oneOf "!~-"
f <- parseFactor
case op of
'!' -> return $ Unary LogNeg f
'~' -> return $ Unary BitCompl f
'-' -> return $ Unary ArithNeg f
parseConst :: Parser Exp
parseConst = do
i <- many1 digit
return (Const $ read i)
我也参考了这个教程代码。 tutorial
simpleExpr7 :: Parser SimpleExpr
simpleExpr7 = do
-- first parse a term
e <- term7
-- then see if it is followed by an '+ expr' suffix
maybeAddSuffix e
where
-- this function takes an expression, and parses a
-- '+ expr' suffix, returning an Add expression
-- it recursively calls itself via the maybeAddSuffix function
addSuffix e0 = do
void $ lexeme $ char '+'
e1 <- term7
maybeAddSuffix (Add e0 e1)
-- this is the wrapper for addSuffix, which adapts it so that if
-- addSuffix fails, it returns just the original expression
maybeAddSuffix e = addSuffix e <|> return e
我的代码不起作用。这段代码是这样工作的。
*Main CodeGen Parser> parseWithEof parseExp "-2"
Right (Unary ArithNeg (Const 2))
*Main CodeGen Parser> parseWithEof parseExp "(2)"
Right (Const 2)
*Main CodeGen Parser> parseWithEof parseExp "-!(((2)))"
Right (Unary ArithNeg (Unary LogNeg (Const 2)))
*Main CodeGen Parser> parseWithEof parseExp "1+2"
Left (line 1, column 4):
unexpected end of input
expecting digit
*Main CodeGen Parser> parseWithEof parseExp "1+2+3"
Left (line 1, column 6):
unexpected end of input
expecting digit
*Main CodeGen Parser> parseWithEof parseExp "1+2*3"
Left (line 1, column 6):
unexpected end of input
expecting digit
我不明白为什么会这样 unexpected end of input
。
考虑解析 1+2
。在parseExp
中,这将1
解析为t1 = Const 1
,然后进入循环loop (Const 1)
。循环尝试第一个选择 termSuffix (Const 1)
,它成功地解析了运算符 +
,下一个术语 t2 = Const 2
,然后循环回到 termSuffix (Binary Plus (Const 1) (Const 2))
,它期望 +
或 -
。解析失败。不要循环回 termSuffix
,您应该循环回 loop
以在第一个 +
:
之后允许一个词项
parseExp :: Parser Exp
parseExp = do
t1 <- parseTerm
loop t1
where termSuffix t1 = do
op <- lexeme $ oneOf "+-"
t2 <- parseTerm
case op of
-- *** use `loop` here, not `termSuffix` ***
'+' -> loop (Binary Plus t1 t2)
'-' -> loop (Binary Minus t1 t2)
loop t = termSuffix t <|> return t
对 parseTerm
进行类似的更改后,您的测试用例都可以正常工作。
我正在编写一个算术解析器来处理像“1+2-3”这样的表达式。我使用 this blog post 作为参考。为了处理左结合性和优先级,我根据这个BNF(来自博客post)用Parsec写了一个解析器。
<exp> ::= <term> { ("+" | "-") <term> }
<term> ::= <factor> { ("*" | "/") <factor> }
<factor> ::= "(" <exp> ")" | <unary_op> <factor> | <int>
这是我的解析器代码。
parseExp :: Parser Exp
parseExp = do
t1 <- parseTerm
loop t1
where termSuffix t1 = do
op <- lexeme $ oneOf "+-"
t2 <- parseTerm
case op of
'+' -> termSuffix (Binary Plus t1 t2)
'-' -> termSuffix (Binary Minus t1 t2)
loop t = termSuffix t <|> return t
parseTerm :: Parser Exp
parseTerm = do
f1 <- parseFactor
loop f1
where factorSuffix f1 = do
op <- lexeme $ oneOf "*/"
f2 <- parseFactor
case op of
'*' -> factorSuffix (Binary Mul f1 f2)
'/' -> factorSuffix (Binary Div f1 f2)
loop t = factorSuffix t <|> return t
parseFactor :: Parser Exp
parseFactor = parseConst <|> parseParen <|> parseUnary
parseParen = do
void $ lexeme $ char '('
e <- parseExp
void $ lexeme $ char ')'
return e
parseUnary :: Parser Exp
parseUnary = do
op <- lexeme $ oneOf "!~-"
f <- parseFactor
case op of
'!' -> return $ Unary LogNeg f
'~' -> return $ Unary BitCompl f
'-' -> return $ Unary ArithNeg f
parseConst :: Parser Exp
parseConst = do
i <- many1 digit
return (Const $ read i)
我也参考了这个教程代码。 tutorial
simpleExpr7 :: Parser SimpleExpr
simpleExpr7 = do
-- first parse a term
e <- term7
-- then see if it is followed by an '+ expr' suffix
maybeAddSuffix e
where
-- this function takes an expression, and parses a
-- '+ expr' suffix, returning an Add expression
-- it recursively calls itself via the maybeAddSuffix function
addSuffix e0 = do
void $ lexeme $ char '+'
e1 <- term7
maybeAddSuffix (Add e0 e1)
-- this is the wrapper for addSuffix, which adapts it so that if
-- addSuffix fails, it returns just the original expression
maybeAddSuffix e = addSuffix e <|> return e
我的代码不起作用。这段代码是这样工作的。
*Main CodeGen Parser> parseWithEof parseExp "-2"
Right (Unary ArithNeg (Const 2))
*Main CodeGen Parser> parseWithEof parseExp "(2)"
Right (Const 2)
*Main CodeGen Parser> parseWithEof parseExp "-!(((2)))"
Right (Unary ArithNeg (Unary LogNeg (Const 2)))
*Main CodeGen Parser> parseWithEof parseExp "1+2"
Left (line 1, column 4):
unexpected end of input
expecting digit
*Main CodeGen Parser> parseWithEof parseExp "1+2+3"
Left (line 1, column 6):
unexpected end of input
expecting digit
*Main CodeGen Parser> parseWithEof parseExp "1+2*3"
Left (line 1, column 6):
unexpected end of input
expecting digit
我不明白为什么会这样 unexpected end of input
。
考虑解析 1+2
。在parseExp
中,这将1
解析为t1 = Const 1
,然后进入循环loop (Const 1)
。循环尝试第一个选择 termSuffix (Const 1)
,它成功地解析了运算符 +
,下一个术语 t2 = Const 2
,然后循环回到 termSuffix (Binary Plus (Const 1) (Const 2))
,它期望 +
或 -
。解析失败。不要循环回 termSuffix
,您应该循环回 loop
以在第一个 +
:
parseExp :: Parser Exp
parseExp = do
t1 <- parseTerm
loop t1
where termSuffix t1 = do
op <- lexeme $ oneOf "+-"
t2 <- parseTerm
case op of
-- *** use `loop` here, not `termSuffix` ***
'+' -> loop (Binary Plus t1 t2)
'-' -> loop (Binary Minus t1 t2)
loop t = termSuffix t <|> return t
对 parseTerm
进行类似的更改后,您的测试用例都可以正常工作。