使用 FParsec 解析自描述输入
Use FParsec to parse a self-describing input
我正在使用 FParsec 来解析描述其自身格式的输入。例如,考虑这个输入:
int,str,int:4,'hello',3
输入的第一部分(冒号之前)描述了输入的第二部分的格式。在这种情况下,格式为 int
、str
、int
,这意味着实际数据由给定类型的三个逗号分隔值组成,因此结果应为 4
, "hello"
, 3
.
使用 FParsec 解析此类内容的最佳方法是什么?
我已将我的最大努力粘贴在下方,但我对此并不满意。有没有更好的方法来做到这一点,它更干净、状态更少、对 parse
monad 的依赖更少?我认为这取决于对 UserState 的更智能管理,但我不知道该怎么做。谢谢。
open FParsec
type State = { Formats : string[]; Index : int32 }
with static member Default = { Formats = [||]; Index = 0 }
type Value =
| Integer of int
| String of string
let parseFormat : Parser<_, State> =
parse {
let! formats =
sepBy
(pstring "int" <|> pstring "str")
(skipString ",")
|>> Array.ofList
do! updateUserState (fun state -> { state with Formats = formats })
}
let parseValue format =
match format with
| "int" -> pint32 |>> Integer
| "str" ->
between
(skipString "'")
(skipString "'")
(manySatisfy (fun c -> c <> '\''))
|>> String
| _ -> failwith "Unexpected"
let parseValueByState =
parse {
let! state = getUserState
let format = state.Formats.[state.Index]
do! setUserState { state with Index = state.Index + 1}
return! parseValue format
}
let parseData =
sepBy
parseValueByState
(skipString ",")
let parse =
parseFormat
>>. skipString ":"
>>. parseData
[<EntryPoint>]
let main argv =
let result = runParserOnString parse State.Default "" "int,str,int:4,'hello',3"
printfn "%A" result
0
原来的代码似乎有几个问题,所以我冒昧地从头开始重写。
首先,几个可能在其他 FParsec 相关项目中有用的库函数:
/// Simple Map
/// usage: let z = Map ["hello" => 1; "bye" => 2]
let (=>) x y = x,y
let makeMap x = new Map<_,_>(x)
/// A handy construct allowing NOT to write lengthy type definitions
/// and also avoid Value Restriction error
type Parser<'t> = Parser<'t, UserState>
/// A list combinator, inspired by FParsec's (>>=) combinator
let (<<+) (p1: Parser<'T list>) (p2: Parser<'T>) =
p1 >>= fun x -> p2 >>= fun y -> preturn (y::x)
/// Runs all parsers listed in the source list;
/// All but the trailing one are also combined with a separator
let allOfSepBy separator parsers : Parser<'T list> =
let rec fold state =
function
| [] -> pzero
| hd::[] -> state <<+ hd
| hd::tl -> fold (state <<+ (hd .>> separator)) tl
fold (preturn []) parsers
|>> List.rev // reverse the list since we appended to the top
现在,主要代码。基本思路是 运行 分三步解析:
- 解析出密钥(纯 ASCII 字符串)
- 将这些键映射到实际的值解析器
- 运行 这些解析器按顺序
其余部分似乎在代码中进行了注释。 :)
/// The resulting type
type Output =
| Integer of int
| String of string
/// tag to parser mappings
let mappings =
[
"int" => (pint32 |>> Integer)
"str" => (
manySatisfy (fun c -> c <> '\'')
|> between (skipChar ''') (skipChar ''')
|>> String
)
]
|> makeMap
let myProcess : Parser<Output list> =
let pKeys = // First, we parse out the keys
many1Satisfy isAsciiLower // Parse one key; keys are always ASCII strings
|> sepBy <| (skipChar ',') // many keys separated by comma
.>> (skipChar ':') // all this with trailing semicolon
let pValues = fun keys ->
keys // take the keys list
|> List.map // find the required Value parser
// (NO ERROR CHECK for bad keys)
(fun p -> Map.find p mappings)
|> allOfSepBy (skipChar ',') // they must run in order, comma-separated
pKeys >>= pValues
运行 字符串:int,int,str,int,str:4,42,'hello',3,'foobar'
返回:[Integer 4; Integer 42; String "hello"; Integer 3; String "foobar"]
@bytebuster 打败了我,但我仍然 post 我的解决方案。该技术类似于@bytebuster。
感谢您提出有趣的问题。
在编译器中,我认为首选技术是将文本解析为 AST,然后 运行 进行类型检查。对于这个例子,一个可能更简单的技术是解析类型定义 returns 一组值的解析器。然后将这些解析器应用于字符串的其余部分。
open FParsec
type Value =
| Integer of int
| String of string
type ValueParser = Parser<Value, unit>
let parseIntValue : Parser<Value, unit> =
pint32 |>> Integer
let parseStringValue : Parser<Value, unit> =
between
(skipChar '\'')
(skipChar '\'')
(manySatisfy (fun c -> c <> '\''))
<?> "string"
|>> String
let parseValueParser : Parser<ValueParser, unit> =
choice
[
skipString "int" >>% parseIntValue
skipString "str" >>% parseStringValue
]
let parseValueParsers : Parser<ValueParser list, unit> =
sepBy1
parseValueParser
(skipChar ',')
// Runs a list of parsers 'ps' separated by 'sep' parser
let sepByList (ps : Parser<'T, unit> list) (sep : Parser<unit, unit>) : Parser<'T list, unit> =
let rec loop adjust ps =
match ps with
| [] -> preturn []
| h::t ->
adjust h >>= fun v -> loop (fun pp -> sep >>. pp) t >>= fun vs -> preturn (v::vs)
loop id ps
let parseLine : Parser<Value list, unit> =
parseValueParsers .>> skipChar ':' >>= (fun vps -> sepByList vps (skipChar ',')) .>> eof
[<EntryPoint>]
let main argv =
let s = "int,str,int:4,'hello',3"
let r = run parseLine s
printfn "%A" r
0
解析 int,str,int:4,'hello',3
产生 Success: [Integer 4; String "hello";Integer 3]
。
解析 int,str,str:4,'hello',3
(不正确)产生:
Failure:
Error in Ln: 1 Col: 23
int,str,str:4,'hello',3
^
Expecting: string
我重写了@FuleSnabel 的 sepByList 如下,以帮助我更好地理解它。这看起来对吗?
let sepByList (parsers : Parser<'T, unit> list) (sep : Parser<unit, unit>) : Parser<'T list, unit> =
let rec loop adjust parsers =
parse {
match parsers with
| [] -> return []
| parser :: tail ->
let! value = adjust parser
let! values = loop (fun parser -> sep >>. parser) tail
return value :: values
}
loop id parsers
我正在使用 FParsec 来解析描述其自身格式的输入。例如,考虑这个输入:
int,str,int:4,'hello',3
输入的第一部分(冒号之前)描述了输入的第二部分的格式。在这种情况下,格式为 int
、str
、int
,这意味着实际数据由给定类型的三个逗号分隔值组成,因此结果应为 4
, "hello"
, 3
.
使用 FParsec 解析此类内容的最佳方法是什么?
我已将我的最大努力粘贴在下方,但我对此并不满意。有没有更好的方法来做到这一点,它更干净、状态更少、对 parse
monad 的依赖更少?我认为这取决于对 UserState 的更智能管理,但我不知道该怎么做。谢谢。
open FParsec
type State = { Formats : string[]; Index : int32 }
with static member Default = { Formats = [||]; Index = 0 }
type Value =
| Integer of int
| String of string
let parseFormat : Parser<_, State> =
parse {
let! formats =
sepBy
(pstring "int" <|> pstring "str")
(skipString ",")
|>> Array.ofList
do! updateUserState (fun state -> { state with Formats = formats })
}
let parseValue format =
match format with
| "int" -> pint32 |>> Integer
| "str" ->
between
(skipString "'")
(skipString "'")
(manySatisfy (fun c -> c <> '\''))
|>> String
| _ -> failwith "Unexpected"
let parseValueByState =
parse {
let! state = getUserState
let format = state.Formats.[state.Index]
do! setUserState { state with Index = state.Index + 1}
return! parseValue format
}
let parseData =
sepBy
parseValueByState
(skipString ",")
let parse =
parseFormat
>>. skipString ":"
>>. parseData
[<EntryPoint>]
let main argv =
let result = runParserOnString parse State.Default "" "int,str,int:4,'hello',3"
printfn "%A" result
0
原来的代码似乎有几个问题,所以我冒昧地从头开始重写。
首先,几个可能在其他 FParsec 相关项目中有用的库函数:
/// Simple Map
/// usage: let z = Map ["hello" => 1; "bye" => 2]
let (=>) x y = x,y
let makeMap x = new Map<_,_>(x)
/// A handy construct allowing NOT to write lengthy type definitions
/// and also avoid Value Restriction error
type Parser<'t> = Parser<'t, UserState>
/// A list combinator, inspired by FParsec's (>>=) combinator
let (<<+) (p1: Parser<'T list>) (p2: Parser<'T>) =
p1 >>= fun x -> p2 >>= fun y -> preturn (y::x)
/// Runs all parsers listed in the source list;
/// All but the trailing one are also combined with a separator
let allOfSepBy separator parsers : Parser<'T list> =
let rec fold state =
function
| [] -> pzero
| hd::[] -> state <<+ hd
| hd::tl -> fold (state <<+ (hd .>> separator)) tl
fold (preturn []) parsers
|>> List.rev // reverse the list since we appended to the top
现在,主要代码。基本思路是 运行 分三步解析:
- 解析出密钥(纯 ASCII 字符串)
- 将这些键映射到实际的值解析器
- 运行 这些解析器按顺序
其余部分似乎在代码中进行了注释。 :)
/// The resulting type
type Output =
| Integer of int
| String of string
/// tag to parser mappings
let mappings =
[
"int" => (pint32 |>> Integer)
"str" => (
manySatisfy (fun c -> c <> '\'')
|> between (skipChar ''') (skipChar ''')
|>> String
)
]
|> makeMap
let myProcess : Parser<Output list> =
let pKeys = // First, we parse out the keys
many1Satisfy isAsciiLower // Parse one key; keys are always ASCII strings
|> sepBy <| (skipChar ',') // many keys separated by comma
.>> (skipChar ':') // all this with trailing semicolon
let pValues = fun keys ->
keys // take the keys list
|> List.map // find the required Value parser
// (NO ERROR CHECK for bad keys)
(fun p -> Map.find p mappings)
|> allOfSepBy (skipChar ',') // they must run in order, comma-separated
pKeys >>= pValues
运行 字符串:int,int,str,int,str:4,42,'hello',3,'foobar'
返回:[Integer 4; Integer 42; String "hello"; Integer 3; String "foobar"]
@bytebuster 打败了我,但我仍然 post 我的解决方案。该技术类似于@bytebuster。
感谢您提出有趣的问题。
在编译器中,我认为首选技术是将文本解析为 AST,然后 运行 进行类型检查。对于这个例子,一个可能更简单的技术是解析类型定义 returns 一组值的解析器。然后将这些解析器应用于字符串的其余部分。
open FParsec
type Value =
| Integer of int
| String of string
type ValueParser = Parser<Value, unit>
let parseIntValue : Parser<Value, unit> =
pint32 |>> Integer
let parseStringValue : Parser<Value, unit> =
between
(skipChar '\'')
(skipChar '\'')
(manySatisfy (fun c -> c <> '\''))
<?> "string"
|>> String
let parseValueParser : Parser<ValueParser, unit> =
choice
[
skipString "int" >>% parseIntValue
skipString "str" >>% parseStringValue
]
let parseValueParsers : Parser<ValueParser list, unit> =
sepBy1
parseValueParser
(skipChar ',')
// Runs a list of parsers 'ps' separated by 'sep' parser
let sepByList (ps : Parser<'T, unit> list) (sep : Parser<unit, unit>) : Parser<'T list, unit> =
let rec loop adjust ps =
match ps with
| [] -> preturn []
| h::t ->
adjust h >>= fun v -> loop (fun pp -> sep >>. pp) t >>= fun vs -> preturn (v::vs)
loop id ps
let parseLine : Parser<Value list, unit> =
parseValueParsers .>> skipChar ':' >>= (fun vps -> sepByList vps (skipChar ',')) .>> eof
[<EntryPoint>]
let main argv =
let s = "int,str,int:4,'hello',3"
let r = run parseLine s
printfn "%A" r
0
解析 int,str,int:4,'hello',3
产生 Success: [Integer 4; String "hello";Integer 3]
。
解析 int,str,str:4,'hello',3
(不正确)产生:
Failure:
Error in Ln: 1 Col: 23
int,str,str:4,'hello',3
^
Expecting: string
我重写了@FuleSnabel 的 sepByList 如下,以帮助我更好地理解它。这看起来对吗?
let sepByList (parsers : Parser<'T, unit> list) (sep : Parser<unit, unit>) : Parser<'T list, unit> =
let rec loop adjust parsers =
parse {
match parsers with
| [] -> return []
| parser :: tail ->
let! value = adjust parser
let! values = loop (fun parser -> sep >>. parser) tail
return value :: values
}
loop id parsers