创建一个对第一个解析器的缩进敏感的 sepBy 解析器组合器
Create a sepBy parser combinator sensitive to the indentation of the first parser
通过 FParsec 并使用此 lib(其代码非常短,已复制到问题末尾),我正在尝试设计一个类似于 sepBy
的解析器这对传入参数的第一个解析器的缩进很敏感。通常,如果我给出这个:indentedSepBy (pstring "Example") (pchar '.')
,我希望这种类型的程序是可以接受的:
Example
.Example
.Example.Example
.Example.Example.Example
.Example
但不是这个:
Example
.Example
.Example.Example
.Example
所以第一个位置(也是第一个解析器)设置其余部分的缩进。为此,我只是尝试使用 FParsec 的默认 sepBy 解析器进行简单重写,方法是将其应用于缩进库,我得到了:
open FParsec
open IndentParsec
let indentSepBy p sep =
parse {
let! pos = getPosition
return! sepBy (greater pos p) (greater pos sep)
}
let test = indentSepBy (pstring "Example") (pchar '.')
let text = "Example.Example" (*Simple for start*)
应用它,我收到以下错误消息(来自 FParsec):
"Error in Ln: 1 Col: 16
Example.Example
^
Note: The error occurred at the end of the input stream.
Expecting: '.'
"
如果我删除与缩进相关的解析器,
let indentSepBy p sep = (*so it's just trivially equivalent to the sepBy parser*)
parse {
let! pos = getPosition
return! sepBy p sep
}
问题不再出现,结果如我们所料。因此,我不明白是哪个参数导致了这个错误。这似乎很可能是缩进库中的问题,但我无法弄清楚...这是有问题的库,我已将其缩短为要点:
open FParsec
module IndentParser =
type Indentation =
| Fail
| Any
| Greater of Position
| Exact of Position
| AtLeast of Position
| StartIndent of Position
member this.Position =
match this with
| Any
| Fail -> None
| Greater p -> Some p
| Exact p -> Some p
| AtLeast p -> Some p
| StartIndent p -> Some p
type IndentState<'T> = { Indent: Indentation; UserState: 'T }
type IndentParser<'T, 'UserState> = Parser<'T, IndentState<'UserState>>
let indentState u = { Indent = Any; UserState = u }
let runParser p u s = runParserOnString p (indentState u) "" s
let runParserOnFile p u path =
runParserOnFile p (indentState u) path System.Text.Encoding.UTF8
let getIndentation: IndentParser<_, _> =
fun stream ->
match stream.UserState with
| { Indent = i } -> Reply i
let putIndentation newi: IndentParser<unit, _> =
fun stream ->
stream.UserState <- { stream.UserState with Indent = newi }
Reply(Unchecked.defaultof<unit>)
let failf fmt = fail << sprintf fmt
let acceptable i (pos: Position) =
match i with
| Any _ -> true
| Fail -> false
| Greater bp -> bp.Column < pos.Column
| Exact ep -> ep.Column = pos.Column
| AtLeast ap -> ap.Column <= pos.Column
| StartIndent _ -> true
let nestableIn i o =
match i, o with
| Greater i, Greater o -> o.Column < i.Column
| Greater i, Exact o -> o.Column < i.Column
| Exact i, Exact o -> o.Column = i.Column
| Exact i, Greater o -> o.Column <= i.Column
| _, _ -> true
let tokeniser p =
parse {
let! pos = getPosition
let! i = getIndentation
if acceptable i pos
then return! p
else return! failf "incorrect indentation at %A" pos
}
let nestP i o p =
parse {
do! putIndentation i
let! x = p
do! notFollowedBy (tokeniser anyChar)
<?> (sprintf "unterminated %A" i)
do! putIndentation o
return x
}
let indented<'a, 'u> i (p: Parser<'a, _>): IndentParser<_, 'u> =
parse {
do! putIndentation i
do! spaces
return! tokeniser p
}
let exact<'a, 'u> pos p: IndentParser<'a, 'u> = indented (Exact pos) p
let greater<'a, 'u> pos p: IndentParser<'a, 'u> = indented (Greater pos) p
let atLeast<'a, 'u> pos p: IndentParser<'a, 'u> = indented (AtLeast pos) p
let any<'a, 'u> pos p: IndentParser<'a, 'u> = indented Any p
我认为这里有两个问题:
- 您要求第一个
"Example"
的缩进超出其本身,这是不可能的。无论当前位置如何,您都应该让第一个解析器成功。
greater
不是原子的,所以当它失败时,您的解析器将处于无效状态。这可能会或可能不会被视为库中的错误。在任何情况下,您都可以通过 attempt
. 使其成为原子
考虑到这一点,我认为以下解析器大致可以满足您的要求:
let indentSepBy p sep =
parse {
let! pos = getPosition
let! head = p
let! tail =
let p' = attempt (greater pos p)
let sep' = attempt (greater pos sep)
many (sep' >>. p')
return head :: tail
}
您可以按如下方式进行测试:
let test =
indentSepBy (pstring "Example") (pchar '.')
let run text =
printfn "***"
runParser (test .>> eof) () text
|> printfn "%A"
[<EntryPoint>]
let main argv =
run "Example.Example" // success
run "Example\n.Example" // failure
run "Example\n .Example" // success
0
请注意,我已强制 test
解析器通过 eof
消耗整个输入。否则,当它实际上无法解析完整字符串时,它会错误地报告成功。
通过 FParsec 并使用此 lib(其代码非常短,已复制到问题末尾),我正在尝试设计一个类似于 sepBy
的解析器这对传入参数的第一个解析器的缩进很敏感。通常,如果我给出这个:indentedSepBy (pstring "Example") (pchar '.')
,我希望这种类型的程序是可以接受的:
Example
.Example
.Example.Example
.Example.Example.Example
.Example
但不是这个:
Example
.Example
.Example.Example
.Example
所以第一个位置(也是第一个解析器)设置其余部分的缩进。为此,我只是尝试使用 FParsec 的默认 sepBy 解析器进行简单重写,方法是将其应用于缩进库,我得到了:
open FParsec
open IndentParsec
let indentSepBy p sep =
parse {
let! pos = getPosition
return! sepBy (greater pos p) (greater pos sep)
}
let test = indentSepBy (pstring "Example") (pchar '.')
let text = "Example.Example" (*Simple for start*)
应用它,我收到以下错误消息(来自 FParsec):
"Error in Ln: 1 Col: 16 Example.Example ^ Note: The error occurred at the end of the input stream. Expecting: '.' "
如果我删除与缩进相关的解析器,
let indentSepBy p sep = (*so it's just trivially equivalent to the sepBy parser*)
parse {
let! pos = getPosition
return! sepBy p sep
}
问题不再出现,结果如我们所料。因此,我不明白是哪个参数导致了这个错误。这似乎很可能是缩进库中的问题,但我无法弄清楚...这是有问题的库,我已将其缩短为要点:
open FParsec
module IndentParser =
type Indentation =
| Fail
| Any
| Greater of Position
| Exact of Position
| AtLeast of Position
| StartIndent of Position
member this.Position =
match this with
| Any
| Fail -> None
| Greater p -> Some p
| Exact p -> Some p
| AtLeast p -> Some p
| StartIndent p -> Some p
type IndentState<'T> = { Indent: Indentation; UserState: 'T }
type IndentParser<'T, 'UserState> = Parser<'T, IndentState<'UserState>>
let indentState u = { Indent = Any; UserState = u }
let runParser p u s = runParserOnString p (indentState u) "" s
let runParserOnFile p u path =
runParserOnFile p (indentState u) path System.Text.Encoding.UTF8
let getIndentation: IndentParser<_, _> =
fun stream ->
match stream.UserState with
| { Indent = i } -> Reply i
let putIndentation newi: IndentParser<unit, _> =
fun stream ->
stream.UserState <- { stream.UserState with Indent = newi }
Reply(Unchecked.defaultof<unit>)
let failf fmt = fail << sprintf fmt
let acceptable i (pos: Position) =
match i with
| Any _ -> true
| Fail -> false
| Greater bp -> bp.Column < pos.Column
| Exact ep -> ep.Column = pos.Column
| AtLeast ap -> ap.Column <= pos.Column
| StartIndent _ -> true
let nestableIn i o =
match i, o with
| Greater i, Greater o -> o.Column < i.Column
| Greater i, Exact o -> o.Column < i.Column
| Exact i, Exact o -> o.Column = i.Column
| Exact i, Greater o -> o.Column <= i.Column
| _, _ -> true
let tokeniser p =
parse {
let! pos = getPosition
let! i = getIndentation
if acceptable i pos
then return! p
else return! failf "incorrect indentation at %A" pos
}
let nestP i o p =
parse {
do! putIndentation i
let! x = p
do! notFollowedBy (tokeniser anyChar)
<?> (sprintf "unterminated %A" i)
do! putIndentation o
return x
}
let indented<'a, 'u> i (p: Parser<'a, _>): IndentParser<_, 'u> =
parse {
do! putIndentation i
do! spaces
return! tokeniser p
}
let exact<'a, 'u> pos p: IndentParser<'a, 'u> = indented (Exact pos) p
let greater<'a, 'u> pos p: IndentParser<'a, 'u> = indented (Greater pos) p
let atLeast<'a, 'u> pos p: IndentParser<'a, 'u> = indented (AtLeast pos) p
let any<'a, 'u> pos p: IndentParser<'a, 'u> = indented Any p
我认为这里有两个问题:
- 您要求第一个
"Example"
的缩进超出其本身,这是不可能的。无论当前位置如何,您都应该让第一个解析器成功。 greater
不是原子的,所以当它失败时,您的解析器将处于无效状态。这可能会或可能不会被视为库中的错误。在任何情况下,您都可以通过attempt
. 使其成为原子
考虑到这一点,我认为以下解析器大致可以满足您的要求:
let indentSepBy p sep =
parse {
let! pos = getPosition
let! head = p
let! tail =
let p' = attempt (greater pos p)
let sep' = attempt (greater pos sep)
many (sep' >>. p')
return head :: tail
}
您可以按如下方式进行测试:
let test =
indentSepBy (pstring "Example") (pchar '.')
let run text =
printfn "***"
runParser (test .>> eof) () text
|> printfn "%A"
[<EntryPoint>]
let main argv =
run "Example.Example" // success
run "Example\n.Example" // failure
run "Example\n .Example" // success
0
请注意,我已强制 test
解析器通过 eof
消耗整个输入。否则,当它实际上无法解析完整字符串时,它会错误地报告成功。