fparsec rfc2822 解析多个 header 行
fparsec rfc2822 parsing multiple header lines
我正在尝试使用 fparsec 解析 RFC 2822,但我无法处理多行的 header:(它与下一个 header 混淆):
这是我最好的尝试:你有什么建议吗?
let str (s:string) = pstring s
let stringLiteral =
manySatisfy (fun c -> c <> ':' && c <> '\r' && c <> '\n')
let ws = many (pchar ' ')
let keyValueSimple = stringLiteral .>>. (ws >>. str ":" >>. ws >>. stringLiteral) .>>. pchar '\n' |>> (fun (a,b) -> a)
let lineValue = ws >>. stringLiteral .>>. (pchar '\n' .>>. ( pchar '\t')) |>> ( fun (a,b) -> a )
let lastValue = ws >>. stringLiteral .>>. (pchar '\n' .>> notFollowedBy ( pchar '\t') ) |>> ( fun (a,b) -> a )
let keyValueComplex = stringLiteral .>>. (ws >>. pchar ':') .>>. (many lineValue) .>>. lastValue |>> ( fun (((f),d),b) -> (f,f) )
let headers = many1 (keyValueComplex)
let parse (fileName:string) =
test headers "Return-Path: <ewrwe@werw.com>\n\twerwe\nDelivered-To: adfasdf@aasdfas.afa.com\n "
我期待 \t 在 3,1: Delivered-To: adfasdf@aasdfas.afa.com
没关系,看起来我需要回溯(使用 attempt)以便解析器 no 总是期待一个 \t 而是寻找下一个 header.
让 keyValueComplex = stringLiteral .>>. (ws >>.pchar ':') .>>. (许多(尝试 lineValue))。>>。 lastValue |>> ( 乐趣 ((((g,h)),d),b) -> (g,(Seq.fold (+) "" d) + b) )
这现在产生:
成功:[("Return-Path", "werwe");
("Delivered-To", "adfasdf@aasdfas.afa.com")]
我正在尝试使用 fparsec 解析 RFC 2822,但我无法处理多行的 header:(它与下一个 header 混淆): 这是我最好的尝试:你有什么建议吗?
let str (s:string) = pstring s
let stringLiteral =
manySatisfy (fun c -> c <> ':' && c <> '\r' && c <> '\n')
let ws = many (pchar ' ')
let keyValueSimple = stringLiteral .>>. (ws >>. str ":" >>. ws >>. stringLiteral) .>>. pchar '\n' |>> (fun (a,b) -> a)
let lineValue = ws >>. stringLiteral .>>. (pchar '\n' .>>. ( pchar '\t')) |>> ( fun (a,b) -> a )
let lastValue = ws >>. stringLiteral .>>. (pchar '\n' .>> notFollowedBy ( pchar '\t') ) |>> ( fun (a,b) -> a )
let keyValueComplex = stringLiteral .>>. (ws >>. pchar ':') .>>. (many lineValue) .>>. lastValue |>> ( fun (((f),d),b) -> (f,f) )
let headers = many1 (keyValueComplex)
let parse (fileName:string) =
test headers "Return-Path: <ewrwe@werw.com>\n\twerwe\nDelivered-To: adfasdf@aasdfas.afa.com\n "
我期待 \t 在 3,1: Delivered-To: adfasdf@aasdfas.afa.com
没关系,看起来我需要回溯(使用 attempt)以便解析器 no 总是期待一个 \t 而是寻找下一个 header.
让 keyValueComplex = stringLiteral .>>. (ws >>.pchar ':') .>>. (许多(尝试 lineValue))。>>。 lastValue |>> ( 乐趣 ((((g,h)),d),b) -> (g,(Seq.fold (+) "" d) + b) )
这现在产生:
成功:[("Return-Path", "