如何使用 Parsec 解析简单的命令式语言?

How to parse simple imperative language using Parsec?

我有一个简单的语言,语法如下

Expr -> Var | Int | Expr Op Expr  
Op -> + | - | * | / | % | == | != | < | > | <= | >= | && | ||  
Stmt -> Skip | Var := Expr | Stmt ; Stmt | write Expr | read Expr | while Expr do Stmt | if Expr then Stmt else Stmt

我正在使用 Haskell 的 Parsec 库为这种语言编写简单的解析器,但我遇到了一些问题

当我尝试解析语句 skip ; skip 时,我只得到第一个 Skip,但是我想去得到类似 Colon Skip Skip

的东西

另外,当我尝试解析作业时,我得到了无限递归。例如,当我尝试解析 x := 1 时,我的电脑挂了很长时间。

这是我的解析器的完整源代码。感谢您的帮助!

module Parser where


import Control.Monad
import Text.Parsec.Language
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr
import Text.ParserCombinators.Parsec.Language
import qualified Text.ParserCombinators.Parsec.Token as Token

type Id = String

data Op = Add
        | Sub
        | Mul
        | Div
        | Mod
        | Eq
        | Neq
        | Gt
        | Geq
        | Lt
        | Leq
        | And
        | Or deriving (Eq, Show)

data Expr = Var Id
          | Num Integer
          | BinOp Op Expr Expr deriving (Eq, Show)

data Stmt = Skip
          | Assign Expr Expr
          | Colon Stmt Stmt
          | Write Expr
          | Read Expr
          | WhileLoop Expr Stmt
          | IfCond Expr Stmt Stmt deriving (Eq, Show)

languageDef =
     emptyDef    { Token.commentStart    = ""
                 , Token.commentEnd      = ""
                 , Token.commentLine     = ""
                 , Token.nestedComments  = False
                 , Token.caseSensitive   = True
                 , Token.identStart      = letter
                 , Token.identLetter     = alphaNum
                 , Token.reservedNames   = [ "skip"
                                           , ";"
                                           , "write"
                                           , "read"
                                           , "while"
                                           , "do"
                                           , "if"
                                           , "then"
                                           , "else"
                                           ]
                 , Token.reservedOpNames = [ "+"
                                           , "-"
                                           , "*"
                                           , "/"
                                           , ":="
                                           , "%"
                                           , "=="
                                           , "!="
                                           , ">"
                                           , ">="
                                           , "<"
                                           , "<="
                                           , "&&"
                                           , "||"
                                           ]
                }

lexer = Token.makeTokenParser languageDef

identifier = Token.identifier lexer
reserved   = Token.reserved   lexer
reservedOp = Token.reservedOp lexer
semi       = Token.semi       lexer
parens     = Token.parens     lexer
integer    = Token.integer    lexer
whiteSpace = Token.whiteSpace lexer

ifStmt :: Parser Stmt
ifStmt = do
    reserved "if"
    cond <- expression
    reserved "then"
    action1 <- statement
    reserved "else"
    action2 <- statement
    return $ IfCond cond action1 action2

whileStmt :: Parser Stmt
whileStmt = do
    reserved "while"
    cond <- expression
    reserved "do"
    action <- statement
    return $ WhileLoop cond action

assignStmt :: Parser Stmt
assignStmt = do
    var <- expression
    reservedOp ":="
    expr <- expression
    return $ Assign var expr

skipStmt :: Parser Stmt
skipStmt = do
    reserved "skip"
    return Skip

colonStmt :: Parser Stmt
colonStmt = do
    s1 <- statement
    reserved ";"
    s2 <- statement
    return $ Colon s1 s2

readStmt :: Parser Stmt
readStmt = do
    reserved "read"
    e <- expression
    return $ Read e

writeStmt :: Parser Stmt
writeStmt = do
    reserved "write"
    e <- expression
    return $ Write e

statement :: Parser Stmt
statement = colonStmt
            <|> assignStmt
            <|> writeStmt
            <|> readStmt
            <|> whileStmt
            <|> ifStmt
            <|> skipStmt

expression :: Parser Expr
expression = buildExpressionParser operators term

term = fmap Var identifier
        <|> fmap Num integer
        <|> parens expression

operators = [ [Infix (reservedOp "==" >> return (BinOp Eq)) AssocNone,
              Infix (reservedOp "!=" >> return (BinOp Neq)) AssocNone,
              Infix (reservedOp ">"  >> return (BinOp Gt)) AssocNone,
              Infix (reservedOp ">=" >> return (BinOp Geq)) AssocNone,
              Infix (reservedOp "<"  >> return (BinOp Lt)) AssocNone,
              Infix (reservedOp "<=" >> return (BinOp Leq)) AssocNone,
              Infix (reservedOp "&&" >> return (BinOp And)) AssocNone,
              Infix (reservedOp "||" >> return (BinOp Or)) AssocNone]

            , [Infix (reservedOp "*"  >> return (BinOp Mul)) AssocLeft,
              Infix (reservedOp "/"  >> return (BinOp Div)) AssocLeft,
              Infix (reservedOp "%"  >> return (BinOp Mod)) AssocLeft]

            , [Infix (reservedOp "+"  >> return (BinOp Add)) AssocLeft,
               Infix (reservedOp "-"  >> return (BinOp Sub)) AssocLeft]
            ]

parser :: Parser Stmt
parser = whiteSpace >> statement

parseString :: String -> Stmt
parseString str =
    case parse parser "" str of
        Left e -> error $ show e
        Right r -> r`

这是基于解析器组合器的解析器的常见问题:statement 是左递归的,因为它的第一个模式是 colonStmtcolonStmt 会做的第一件事就是尝试解析再次 statement。众所周知,解析器组合器不会在这种情况下终止。

statement 解析器中删除了 colonStmt 模式,其他部分正常工作:

> parseString "if (1 == 1) then skip else skip"
< IfCond (BinOp Eq (Num 1) (Num 1)) Skip Skip
> parseString "x := 1"
< Assign (Var "x") (Num 1)

解决方案在this repo中有完整描述,没有license文件所以我真的不知道参考代码是否安全,一般的想法是在解析任何内容时添加另一层解析器声明:

statement :: Parser Stmt
statement = do
    ss <- sepBy1 statement' (reserved ";")
    if length ss == 1
        then return $ head ss
        else return $ foldr1 Colon ss

statement' :: Parser Stmt
statement' = assignStmt
            <|> writeStmt
            <|> readStmt
            <|> whileStmt
            <|> ifStmt
            <|> skipStmt