创建一个对第一个解析器的缩进敏感的 sepBy 解析器组合器

Create a sepBy parser combinator sensitive to the indentation of the first parser

通过 FParsec 并使用此 lib(其代码非常短,已复制到问题末尾),我正在尝试设计一个类似于 sepBy 的解析器这对传入参数的第一个解析器的缩进很敏感。通常,如果我给出这个:indentedSepBy (pstring "Example") (pchar '.'),我希望这种类型的程序是可以接受的:

Example
  .Example
  .Example.Example
  .Example.Example.Example
  .Example

但不是这个:

Example
.Example
.Example.Example
.Example

所以第一个位置(也是第一个解析器)设置其余部分的缩进。为此,我只是尝试使用 FParsec 的默认 sepBy 解析器进行简单重写,方法是将其应用于缩进库,我得到了:

open FParsec
open IndentParsec

let indentSepBy p sep =
    parse {
        let! pos = getPosition
        return! sepBy (greater pos p) (greater pos sep)
    }

let test = indentSepBy (pstring "Example") (pchar '.')
let text = "Example.Example" (*Simple for start*)

应用它,我收到以下错误消息(来自 FParsec):

"Error in Ln: 1 Col: 16
Example.Example
               ^
Note: The error occurred at the end of the input stream.
Expecting: '.'
"

如果我删除与缩进相关的解析器,

let indentSepBy p sep = (*so it's just trivially equivalent to the sepBy parser*)
    parse {
        let! pos = getPosition
        return! sepBy p sep
    }

问题不再出现,结果如我们所料。因此,我不明白是哪个参数导致了这个错误。这似乎很可能是缩进库中的问题,但我无法弄清楚...这是有问题的库,我已将其缩短为要点:

open FParsec

module IndentParser =
    type Indentation =
        | Fail
        | Any
        | Greater of Position
        | Exact of Position
        | AtLeast of Position
        | StartIndent of Position
        member this.Position =
            match this with
            | Any
            | Fail -> None
            | Greater p -> Some p
            | Exact p -> Some p
            | AtLeast p -> Some p
            | StartIndent p -> Some p

    type IndentState<'T> = { Indent: Indentation; UserState: 'T }
    type IndentParser<'T, 'UserState> = Parser<'T, IndentState<'UserState>>

    let indentState u = { Indent = Any; UserState = u }
    let runParser p u s = runParserOnString p (indentState u) "" s

    let runParserOnFile p u path =
        runParserOnFile p (indentState u) path System.Text.Encoding.UTF8

    let getIndentation: IndentParser<_, _> =
        fun stream ->
            match stream.UserState with
            | { Indent = i } -> Reply i

    let putIndentation newi: IndentParser<unit, _> =
        fun stream ->
            stream.UserState <- { stream.UserState with Indent = newi }
            Reply(Unchecked.defaultof<unit>)

    let failf fmt = fail << sprintf fmt

    let acceptable i (pos: Position) =
        match i with
        | Any _ -> true
        | Fail -> false
        | Greater bp -> bp.Column < pos.Column
        | Exact ep -> ep.Column = pos.Column
        | AtLeast ap -> ap.Column <= pos.Column
        | StartIndent _ -> true

    let nestableIn i o =
        match i, o with
        | Greater i, Greater o -> o.Column < i.Column
        | Greater i, Exact o -> o.Column < i.Column
        | Exact i, Exact o -> o.Column = i.Column
        | Exact i, Greater o -> o.Column <= i.Column
        | _, _ -> true

    let tokeniser p =
        parse {
            let! pos = getPosition
            let! i = getIndentation

            if acceptable i pos
            then return! p
            else return! failf "incorrect indentation at %A" pos
        }

    let nestP i o p =
        parse {
            do! putIndentation i
            let! x = p

            do! notFollowedBy (tokeniser anyChar)
                <?> (sprintf "unterminated %A" i)

            do! putIndentation o

            return x
        }

    let indented<'a, 'u> i (p: Parser<'a, _>): IndentParser<_, 'u> =
        parse {
            do! putIndentation i
            do! spaces
            return! tokeniser p
        }

    let exact<'a, 'u> pos p: IndentParser<'a, 'u> = indented (Exact pos) p
    let greater<'a, 'u> pos p: IndentParser<'a, 'u> = indented (Greater pos) p
    let atLeast<'a, 'u> pos p: IndentParser<'a, 'u> = indented (AtLeast pos) p
    let any<'a, 'u> pos p: IndentParser<'a, 'u> = indented Any p

我认为这里有两个问题:

  1. 您要求第一个 "Example" 的缩进超出其本身,这是不可能的。无论当前位置如何,您都应该让第一个解析器成功。
  2. greater 不是原子的,所以当它失败时,您的解析器将处于无效状态。这可能会或可能不会被视为库中的错误。在任何情况下,您都可以通过 attempt.
  3. 使其成为原子

考虑到这一点,我认为以下解析器大致可以满足您的要求:

let indentSepBy p sep =
    parse {
        let! pos = getPosition
        let! head = p
        let! tail =
            let p' = attempt (greater pos p)
            let sep' = attempt (greater pos sep)
            many (sep' >>. p')
        return head :: tail
    }

您可以按如下方式进行测试:

let test =
    indentSepBy (pstring "Example") (pchar '.')

let run text =
    printfn "***"
    runParser (test .>> eof) () text
        |> printfn "%A"

[<EntryPoint>]
let main argv =
    run "Example.Example"      // success
    run "Example\n.Example"    // failure
    run "Example\n .Example"   // success
    0

请注意,我已强制 test 解析器通过 eof 消耗整个输入。否则,当它实际上无法解析完整字符串时,它会错误地报告成功。