如何使用 Rust nom 为这种结构文本编写解析器?
How to use Rust nom to write a parser for this kind of structure text?
我有以下数据
let data = r#"title1
title1 line1
title1 line2
sep/
title2
title2 line1
title2 line2
title2 line3
sep/
title3
title3 line1
sep/"#;
基本上代表三个条目:
struct Entry {
title: String,
body: String,
}
每个条目都有一个标题和一个 body。标题占用一行(不包括行尾),body 占用所有后续行,直到遇到分隔线 (sep/
)。我想要的结果是条目向量。我如何使用 nom 来解析它?我是 nom 的新手,我无法让这些部分一起工作并形成一个有效的解析器。以下是我的资料:
use nom::IResult;
use nom::branch::alt;
use nom::bytes::complete::{tag, take_until, is_not, is_a};
use nom::error::ErrorKind::ParseTo;
use nom::sequence::{pair, tuple, delimited, terminated};
use nom::combinator::opt;
use nom::error::{Error, ErrorKind};
use nom::character::complete::line_ending;
use nom::regexp::str::{re_find, re_match, re_matches, re_capture};
use nom::multi::many0;
struct Entry {
title: String,
body: String,
}
fn get_entry_title(i: &str) -> IResult<&str, &str> {
delimited(tag(""),
take_until(alt((
tag("\r\n"),
tag("\n")
))),
alt((
tag("\r\n"),
tag("\n")
))
)(i)
}
fn get_entry_body(i: &str) -> IResult<&str, &str> {
terminated(
take_until( tag("sep/")),
tag("sep/")
)(i)
}
fn main() {
let data = r#"title1
title1 line1
title1 line2
sep/
title2
title2 line1
title2 line2
title2 line3
sep/
title3
title3 line1
sep/"#;
let result = get_entry_title(&data);
}
这是一个仅 nom 的方法(nom 6.1.2):
use nom::IResult;
use nom::branch::alt;
use nom::bytes::complete::{tag, take_while};
use nom::sequence::{terminated, delimited, pair};
use nom::multi::{separated_list0, many1};
#[derive(Debug)]
struct Entry {
title: String,
body: String,
}
fn main() {
let input = r#"title1
title1 line1
title1 line2
sep/
title2
title2 line1
title2 line2
title2 line3
sep/
title3
title3 line1
sep/"#;
let (_, entries) = parse(input).unwrap();
println!("{:#?}", entries);
}
fn parse(input: &str) -> IResult<&str, Vec<Entry>> {
separated_list0(
separator,
entry,
)(input)
}
fn entry(input: &str) -> IResult<&str, Entry> {
let (input, title) = title(input)?;
let (input, body_lines) = many1(body_line(title))(input)?;
let body = body_lines.join("");
let entry = Entry {
title: title.to_owned(),
body,
};
//TODO: Does it have to end with a separator ?
// If it does, either use terminated() in combination with many(), or add
// an additional check for separator here
IResult::Ok((input, entry))
}
fn title(input: &str) -> IResult<&str, &str> {
terminated(
take_while(not_r_n),
end_of_line,
)(input)
}
pub fn body_line<'i>(title: &'i str) -> impl FnMut(&'i str) -> IResult<&'i str, &'i str, nom::error::Error<&'i str>>
{
move |input: &str| {
delimited(
pair(tag(title), tag(" ")),
take_while(not_r_n),
end_of_line,
)(input)
}
}
fn separator(input: &str) -> IResult<&str, &str> {
terminated(
tag("sep/"), // the separator is hardcoded, otherwise you have to do the same monstrosity as body_line() above
end_of_line,
)(input)
}
fn end_of_line(input: &str) -> IResult<&str, &str> {
alt((
tag("\n"),
tag("\r\n")
))(input)
}
fn not_r_n(ch: char) -> bool {
ch != '\r' && ch != '\n'
}
我有以下数据
let data = r#"title1
title1 line1
title1 line2
sep/
title2
title2 line1
title2 line2
title2 line3
sep/
title3
title3 line1
sep/"#;
基本上代表三个条目:
struct Entry {
title: String,
body: String,
}
每个条目都有一个标题和一个 body。标题占用一行(不包括行尾),body 占用所有后续行,直到遇到分隔线 (sep/
)。我想要的结果是条目向量。我如何使用 nom 来解析它?我是 nom 的新手,我无法让这些部分一起工作并形成一个有效的解析器。以下是我的资料:
use nom::IResult;
use nom::branch::alt;
use nom::bytes::complete::{tag, take_until, is_not, is_a};
use nom::error::ErrorKind::ParseTo;
use nom::sequence::{pair, tuple, delimited, terminated};
use nom::combinator::opt;
use nom::error::{Error, ErrorKind};
use nom::character::complete::line_ending;
use nom::regexp::str::{re_find, re_match, re_matches, re_capture};
use nom::multi::many0;
struct Entry {
title: String,
body: String,
}
fn get_entry_title(i: &str) -> IResult<&str, &str> {
delimited(tag(""),
take_until(alt((
tag("\r\n"),
tag("\n")
))),
alt((
tag("\r\n"),
tag("\n")
))
)(i)
}
fn get_entry_body(i: &str) -> IResult<&str, &str> {
terminated(
take_until( tag("sep/")),
tag("sep/")
)(i)
}
fn main() {
let data = r#"title1
title1 line1
title1 line2
sep/
title2
title2 line1
title2 line2
title2 line3
sep/
title3
title3 line1
sep/"#;
let result = get_entry_title(&data);
}
这是一个仅 nom 的方法(nom 6.1.2):
use nom::IResult;
use nom::branch::alt;
use nom::bytes::complete::{tag, take_while};
use nom::sequence::{terminated, delimited, pair};
use nom::multi::{separated_list0, many1};
#[derive(Debug)]
struct Entry {
title: String,
body: String,
}
fn main() {
let input = r#"title1
title1 line1
title1 line2
sep/
title2
title2 line1
title2 line2
title2 line3
sep/
title3
title3 line1
sep/"#;
let (_, entries) = parse(input).unwrap();
println!("{:#?}", entries);
}
fn parse(input: &str) -> IResult<&str, Vec<Entry>> {
separated_list0(
separator,
entry,
)(input)
}
fn entry(input: &str) -> IResult<&str, Entry> {
let (input, title) = title(input)?;
let (input, body_lines) = many1(body_line(title))(input)?;
let body = body_lines.join("");
let entry = Entry {
title: title.to_owned(),
body,
};
//TODO: Does it have to end with a separator ?
// If it does, either use terminated() in combination with many(), or add
// an additional check for separator here
IResult::Ok((input, entry))
}
fn title(input: &str) -> IResult<&str, &str> {
terminated(
take_while(not_r_n),
end_of_line,
)(input)
}
pub fn body_line<'i>(title: &'i str) -> impl FnMut(&'i str) -> IResult<&'i str, &'i str, nom::error::Error<&'i str>>
{
move |input: &str| {
delimited(
pair(tag(title), tag(" ")),
take_while(not_r_n),
end_of_line,
)(input)
}
}
fn separator(input: &str) -> IResult<&str, &str> {
terminated(
tag("sep/"), // the separator is hardcoded, otherwise you have to do the same monstrosity as body_line() above
end_of_line,
)(input)
}
fn end_of_line(input: &str) -> IResult<&str, &str> {
alt((
tag("\n"),
tag("\r\n")
))(input)
}
fn not_r_n(ch: char) -> bool {
ch != '\r' && ch != '\n'
}