FParsec 的缩进、表达式、语句和 StackOverflowException - 错误

Indentation, expressions, statements and StackOverflowException with FParsec - Errors

根据 this implementation,我使用 FParsec 测试缩进,但是当我通过添加表达式(文字、列表、元组和算术运算)使其变得更复杂时,允许表达式到顶层,并且添加变量创建语句;我首先收到 WhosebugException 错误。在我看来,这是因为表达式解析器以在程序中形成无限循环的方式请求。我看不出其他原因,但是,我不知道如何解决这个问题。

如果我从我的解析器数据 statement 中删除 attempt pexpression,则不再有 WhosebugException,但是模块 IndentationParserWithoutBacktracking(因此管理缩进)告诉我要解析的代码缺少 "newline":

Failure: Error in Ln: 2 Col: 1
loop i 0 10
^
Expecting: let or print

The parser backtracked after:
  Error in Ln: 3 Col: 5
      let myVar = 2 + 1
      ^
  Expecting: loop or print

  The parser backtracked after:
    Error in Ln: 3 Col: 17
        let myVar = 2 + 1
                    ^
    Expecting: newline

这一切都根据下面的文字来解析:

loop i 0 10
    let myVar = 2 + 1
    print myVar

这是我的代码:

open FParsec

// module IndentationParserWithoutBacktracking // see the link

// Utils

open IndentationParserWithoutBacktracking

let isBlank = fun c -> c = ' ' || c = '\t'
let ws  = spaces
let ws1 = skipMany1SatisfyL isBlank "whitespace"
let str s = pstring s .>> ws

let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str

// AST

type Identifier = Identifier of string

type InfixOp = 
    | Sum | Sub | Mul | Div | Pow | Mod
    | And | Or | Equal | NotEqual | Greater | Smaller | GreaterEqual | SmallerEqual

type Value =
    | Int of int
    | Float of float
    | Bool of bool
    | String of string
    | Char of char
    | Variable of Identifier

type Expr =
    | Literal of Value
    | Infix of Expr * InfixOp * Expr
    | List of Expr list
    | Tuple of Expr list

type Statement =
    | Expression of Expr
    | Let of Identifier * Statement list
    | Loop of Identifier * Expr * Expr * Statement list
    | Print of Identifier

// Literals

let numberFormat = NumberLiteralOptions.AllowMinusSign   ||| NumberLiteralOptions.AllowFraction |||
                   NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowOctal    |||
                   NumberLiteralOptions.AllowBinary      ||| NumberLiteralOptions.AllowPlusSign

let literal_numeric =
    numberLiteral numberFormat "number" |>> fun nl ->
        if nl.IsInteger then Literal (Int(int nl.String))
        else Literal (Float(float nl.String))

let literal_bool = 
    (choice [
        (stringReturn "true" (Literal (Bool true)))
        (stringReturn "false" (Literal (Bool false)))
    ]
    .>> ws) <?> "boolean"

let literal_string = 
    (between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
    |>> fun s -> Literal (String s)) <?> "string"

let literal_char = 
    (between (pstring "'") (pstring "'") (satisfy (fun c -> c <> '''))
    |>> fun c -> Literal (Char c)) <?> "character"

let identifier =
    (many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
    |>> fun i -> Identifier i) <?> "valid identifier"

let betweenParentheses p =
    (between (str "(") (str ")") p)

let variable = identifier |>> fun id -> Literal (Variable id)

let literal = (attempt literal_numeric  <|>
               attempt literal_bool     <|>
               attempt literal_char     <|>
               attempt literal_string   <|>
               attempt variable)        <?> "literal"

// Expressions

let pexpr, pexprimpl = createParserForwardedToRef()

let term =
    (ws >>. literal .>> ws) <|>
    (betweenParentheses (ws >>. pexpr)) <|>
    (ws >>. pexpr .>> ws)

let infixOperator (p: OperatorPrecedenceParser<_, _, _>) op prec map =
    p.AddOperator(InfixOperator(op, ws, prec, Associativity.Left, map))

let ops =
    // Arithmetic
    [ "+"; "-"; "*"; "/"; "%" ] @
    // Logical
    [ "&&"; "||"; "=="; "!="; ">"; "<"; ">="; "<=" ]

let opCorrespondance op =
    match op with
    // Arithmetic operators
    | "+"  -> Sum
    | "-"  -> Sub
    | "*"  -> Mul
    | "/"  -> Div
    | "%"  -> Mod
    // Logical operators
    | "&&" -> And
    | "||" -> Or
    | "==" -> Equal
    | "!=" -> NotEqual
    | ">"  -> Greater
    | "<"  -> Smaller
    | ">=" -> GreaterEqual
    | "<=" -> SmallerEqual

let opParser = new OperatorPrecedenceParser<_, _, _>()

for op in ops do
    infixOperator opParser op 1 (fun x y -> Infix(x, opCorrespondance op, y))

opParser.TermParser <- term

let list = between (str "[") (str "]") (sepBy pexpr (str ",")) |>> List

let tuple = between (str "(") (str ")") (sepBy pexpr (str ",")) |>> Tuple

let expression =
    opParser.ExpressionParser   <|> // I removed this line to don't have the mistake again.
    list                        <|>
    tuple                       <|>
    literal

pexprimpl := attempt expression

// Statements

let statements, statementsRef = createParserForwardedToRef()

let pexpression = expression |>> Expression

let plet =
    pipe2
        (keyword "let" >>. ws1 >>. identifier)
        (ws >>. str "=" >>. ws >>. statements)
        (fun id gtt exp -> Let(id, gtt, exp))

// From the link, but "revisited"
let ploop =
    pipe4
        (keyword "loop" >>. ws1 >>. identifier)
        (ws1 >>. literal) // If I put 'pexpr', it doesn't work too...
        (ws1 >>. literal)
        (statements)
        (fun id min max stmts -> Loop(id, min, max, stmts))

let print = keyword "print" >>. (ws1 >>. identifier |>> Print)

let statement =
    attempt plet        <|>
    attempt print       <|>
    attempt ploop       <|>
    attempt pexpression

statementsRef := indentedMany1 statement "statement"

let document = statements .>> spaces .>> eof

let test str =
    match runParserOnString document (UserState.Create()) "" str with
        | Success(result, _, _)   -> printfn "Success: %A" result
        | Failure(errorMsg, _, _) -> printfn "Failure: %s" errorMsg

System.Console.Clear()

test @"
loop i 0 10
    let myVar = 2 + 1
    print myVar
"

我知道我同时问了好几个问题,而且网站确实不允许,但它们都有点联系在一起,所以我还不如一石二鸟...

我真的很想了解我的错误,以便为非常小的 ML-like 语言设计一个解析器。

谢谢。

编辑

这是我当前的代码,已修改以响应遇到的第一个缩进问题:

open IndentationParserWithoutBacktracking // So from the link

let isBlank = fun c -> c = ' ' || c = '\t'
let ws  = spaces
let ws1 = skipMany1SatisfyL isBlank "whitespace"
let str s = pstring s .>> ws

let keyword str = pstring str >>? nextCharSatisfiesNot (fun c -> isLetter c || isDigit c) <?> str

// AST

type Identifier = Identifier of string

type Value =
    | Int of int
    | Float of float
    | Bool of bool
    | String of string
    | Char of char
    | Variable of Identifier

// In FP, "all" is an expression, so:

type Expr =
    // Arithmetic + lists and tuple
    | Literal of Value
    | Infix of Expr * InfixOp * Expr
    | List of Expr list
    | Tuple of Expr list
    // Statements
    | Return of Expr
    | Loop of Identifier * Expr * Expr * Expr list
    | Print of Identifier
and InfixOp = 
    | Sum | Sub | Mul | Div | Pow | Mod
    | And | Or | Equal | NotEqual | Greater | Smaller | GreaterEqual | SmallerEqual

// Literals

let numberFormat = NumberLiteralOptions.AllowMinusSign   ||| NumberLiteralOptions.AllowFraction |||
                   NumberLiteralOptions.AllowHexadecimal ||| NumberLiteralOptions.AllowOctal    |||
                   NumberLiteralOptions.AllowBinary

let literal_numeric =
    numberLiteral numberFormat "number" |>> fun nl ->
        if nl.IsInteger then Literal (Int(int nl.String))
        else Literal (Float(float nl.String))

let literal_bool = 
    (choice [
        (stringReturn "true" (Literal (Bool true)))
        (stringReturn "false" (Literal (Bool false)))
    ]
    .>> ws) <?> "boolean"

let literal_string = 
    (between (pstring "\"") (pstring "\"") (manyChars (satisfy (fun c -> c <> '"')))
    |>> fun s -> Literal (String s)) <?> "string"

let literal_char = 
    (between (pstring "'") (pstring "'") (satisfy (fun c -> c <> '''))
    |>> fun c -> Literal (Char c)) <?> "character"

let identifier =
    (many1Satisfy2L isLetter (fun c -> isLetter c || isDigit c) "identifier"
    |>> fun i -> Identifier i) <?> "identifier"

let betweenParentheses p =
    (between (str "(") (str ")") p) <?> ""

let variable = identifier |>> fun id -> Literal (Variable id)

let literal = (attempt literal_numeric  <|>
               attempt literal_bool     <|>
               attempt literal_char     <|>
               attempt literal_string   <|>
               attempt variable)        <?> "literal"

// Expressions and statements

let pexprs, pexprimpl = createParserForwardedToRef()

// `ploop` is located here to force `pexprs` to be of the type `Expr list`, `ploop` requesting an expression list.
let ploop =
    pipe4
        (keyword "loop" >>. ws1 >>. identifier)
        (ws1 >>. literal)
        (ws1 >>. literal)
        (pexprs)
        (fun id min max stmts -> Loop(id, min, max, stmts))

// `singlepexpr` allows to use only one expression.
let singlepexpr =
    pexprs |>> fun ex -> ex.Head

let term =
    (ws >>. singlepexpr .>> ws) <|>
    (betweenParentheses (ws >>. singlepexpr)) <|>
    (ws >>. literal .>> ws) <|>
    (betweenParentheses (ws >>. literal))

let infixOperator (p: OperatorPrecedenceParser<_, _, _>) op prec map =
    p.AddOperator(InfixOperator(op, ws, prec, Associativity.Left, map))

let ops =
    // Arithmetic
    [ "+"; "-"; "*"; "/"; "%" ] @
    // Logical
    [ "&&"; "||"; "=="; "!="; ">"; "<"; ">="; "<=" ]

let opCorrespondance op =
    match op with
    // Arithmetic operators
    | "+"  -> Sum
    | "-"  -> Sub
    | "*"  -> Mul
    | "/"  -> Div
    | "%"  -> Mod
    // Logical operators
    | "&&" -> And
    | "||" -> Or
    | "==" -> Equal
    | "!=" -> NotEqual
    | ">"  -> Greater
    | "<"  -> Smaller
    | ">=" -> GreaterEqual
    | "<=" -> SmallerEqual

let opParser = new OperatorPrecedenceParser<Expr, unit, UserState>()

for op in ops do
    infixOperator opParser op 1 (fun x y -> Infix(x, opCorrespondance op, y))

opParser.TermParser <- term

let list = (between (str "[") (str "]") (sepBy singlepexpr (str ",")) |>> List) <?> "list"

let tuple = (between (str "(") (str ")") (sepBy singlepexpr (str ",")) |>> Tuple) <?> "tuple"

// Statements

// A commented `let` expression, commented for tests with the `return` instruction.

//let plet =
//    pipe3
//        (keyword "let" >>. ws1 >>. identifier)
//        (ws >>. gtt ":")
//        (ws >>. str "=" >>. ws >>. pexprs)
//        (fun id gtt exp -> Let(id, gtt, exp))

let preturn = 
    keyword "return" >>. ws >>. singlepexpr
    |>> fun ex -> Return ex

let print = keyword "print" >>. (ws1 >>. identifier |>> Print)

let instruction =
    print <|>
    ploop <|>
    preturn <|>

    opParser.ExpressionParser <|> // So we add the arithmetic, like x + y or 21 * 32 - 12 for example
    list <|>
    tuple <|>
    literal

pexprimpl := indentedMany1 instruction "instruction"

let document = pexprs .>> spaces .>> eof

let test str =
    match runParserOnString document (UserState.Create()) "" str with
        | Success(result, _, _)   -> printfn "%A" result
        | Failure(errorMsg, _, _) -> printfn "%s" errorMsg

System.Console.Clear()

// The test code that give an error of "newline" expecting
let code = test @"
return 2 + 1
"

这里有一些关于错误的截图:

为什么 indentedMany1 告诉你它在你的示例代码中期待一个换行符是因为它正在寻找:一个缩进的 。不是一行中的一个表达式。所以你的 let myVar = 2 + 1 行混淆了它。如果你写成:

let myVar =
    2 + 1

那我打赌它会起作用。

我相信,您需要的是更改 let 解析器以允许以下两种情况之一:或者 单行表达式, 语句块(您的 statements 解析器)。即,类似于:

let pLetValue = expression <|> statements
let plet =
    pipe2
        (keyword "let" >>. ws1 >>. identifier)
        (ws >>. str "=" >>. ws >>. pLetValue)
        (fun id gtt exp -> Let(id, gtt, exp))

请注意,我还没有对此进行测试,因为我今天没有太多时间。您可能需要 attempt expression(或 pexpr,两者是一回事),而不是上面的 expression。稍微试验一下,看看会发生什么;如果您在尝试弄清楚 FParsec 如何处理给定表达式时完全迷失了方向,请记住 http://www.quanttec.com/fparsec/users-guide/debugging-a-parser.html.

中给出的建议