一个关于用 rust 中的 crate Nom 解析 http header 的奇怪问题
A weird question about parsing http header by the crate Nom in rust
我写了一些使用 NOM 解析 HTTP 的代码 headers,但得到了一些奇怪的结果,如示例所示。
在main函数中,字符串input1只比input2多了一个末尾的分号,却得到了完全不同的结果。显然,我希望得到与文本(input2)类似的结果,感谢您帮助我检查发生了什么。
[dependencies]
nom = "6"
use nom::{
branch::alt,
bytes::complete::{tag, tag_no_case, take_until},
character::complete::multispace0,
combinator::opt,
multi::separated_list0,
sequence::tuple,
};
use nom::{AsChar, IResult, InputTakeAtPosition};
use std::collections::HashMap;
use std::fmt::Debug;
#[derive(Debug, PartialEq)]
pub struct Header<'a> {
pub options_header_cookies: HashMap<&'a str, &'a str>,
pub options_headers_more: HashMap<&'a str, &'a str>,
}
#[allow(dead_code)]
fn key(input: &str) -> IResult<&str, &str> {
input.split_at_position_complete(|item| {
!(item.is_alphanum() || item.as_char() == '-' || item.as_char() == '_')
})
}
#[allow(dead_code)]
fn cookie_pairs(input: &str) -> IResult<&str, HashMap<&str, &str>> {
let (input, cookies) = separated_list0(
tag(";"),
tuple((
multispace0,
key,
tag("="),
alt((take_until(";"), take_until("'"))),
multispace0,
)),
)(input)?;
Ok((input, cookies.into_iter().map(|c| (c.1, c.3)).collect()))
}
#[allow(dead_code)]
fn options_header_cookie(input: &str) -> IResult<&str, HashMap<&str, &str>> {
let (input, (_, _, cookies, _)) = tuple((
alt((tag("-H\x20"), tag("--header\x20"))),
tag_no_case("'cookie:\x20"),
cookie_pairs,
tag("'"),
))(input)?;
Ok((input, cookies))
}
#[allow(dead_code)]
fn options_header_(input: &str) -> IResult<&str, (&str, &str)> {
let (input, (_, k, _, v, _)) = tuple((
alt((tag("-H\x20'"), tag("--header\x20'"))),
key,
tag(":\x20"),
take_until("'"),
tag("'"),
))(input)?;
Ok((input, (k, v)))
}
fn text(input: &str) -> IResult<&str, Header> {
let mut h = Header {
options_header_cookies: HashMap::new(),
options_headers_more: HashMap::new(),
};
let (input, opts) = separated_list0(
tag("\x20"),
tuple((
opt(tag("\\n")),
multispace0,
tuple((opt(options_header_cookie), opt(options_header_))),
)),
)(input)?;
for (_, _, o) in opts {
if let Some(cookies) = o.0 {
h.options_header_cookies = cookies;
continue;
}else if let Some(header) = o.1 {
h.options_headers_more.insert(header.0, header.1);
continue;
}
}
Ok((input, h))
}
#[allow(dead_code)]
fn debug<T: Debug>(o: T) {
println!("=> {:#?}", o);
}
fn main() {
let input1 = r#"
-H 'Cookie: NID=219=Ji47zdfV6mSKlkKmpVf8F67O80WTSw; DV=03-vBWQ2RBEqsNFUD5FEuieRJvkwrRfXaKa0v0Cj2wAAAAA' \
-H 'User-Agent: Mozilla/5.0 Macintosh;'"#;
debug(text(input1));
let input2 = r#"
-H 'Cookie: NID=219=Ji47zdfV6mSKlkKmpVf8F67O80WTSw; DV=03-vBWQ2RBEqsNFUD5FEuieRJvkwrRfXaKa0v0Cj2wAAAAA' \
-H 'User-Agent: Mozilla/5.0 Macintosh'"#;
debug(text(input2));
}
问题出在您的 cookie_pairs()
解析器上:
fn cookie_pairs(input: &str) -> IResult<&str, HashMap<&str, &str>> {
let (input, cookies) = separated_list0(
tag(";"),
tuple((
multispace0,
key,
tag("="),
alt((take_until(";"), take_until("'"))),
multispace0,
)),
)(input)?;
Ok((input, cookies.into_iter().map(|c| (c.1, c.3)).collect()))
}
alt()
组合器运行第一个解析器直至完成,只有在失败时才尝试第二个:
alt((take_until(";"), take_until("'")))
所以在尾随 ;
的情况下,解析器基本上会消耗整个输入,从而使父解析器失败并且不返回任何 cookie。
修复很简单。您必须将其替换为:
take_while(|ch| ch != '\'' && ch != ';')
当 '
或 ;
出现时将停止并且不会消耗整个输入:
fn cookie_pairs(input: &str) -> IResult<&str, HashMap<&str, &str>> {
let (input, cookies) = separated_list0(
tag(";"),
tuple((
multispace0,
key,
tag("="),
take_while(|ch| ch != '\'' && ch != ';'),
multispace0,
)),
)(input)?;
Ok((input, cookies.into_iter().map(|c| (c.1, c.3)).collect()))
}
我写了一些使用 NOM 解析 HTTP 的代码 headers,但得到了一些奇怪的结果,如示例所示。 在main函数中,字符串input1只比input2多了一个末尾的分号,却得到了完全不同的结果。显然,我希望得到与文本(input2)类似的结果,感谢您帮助我检查发生了什么。
[dependencies]
nom = "6"
use nom::{
branch::alt,
bytes::complete::{tag, tag_no_case, take_until},
character::complete::multispace0,
combinator::opt,
multi::separated_list0,
sequence::tuple,
};
use nom::{AsChar, IResult, InputTakeAtPosition};
use std::collections::HashMap;
use std::fmt::Debug;
#[derive(Debug, PartialEq)]
pub struct Header<'a> {
pub options_header_cookies: HashMap<&'a str, &'a str>,
pub options_headers_more: HashMap<&'a str, &'a str>,
}
#[allow(dead_code)]
fn key(input: &str) -> IResult<&str, &str> {
input.split_at_position_complete(|item| {
!(item.is_alphanum() || item.as_char() == '-' || item.as_char() == '_')
})
}
#[allow(dead_code)]
fn cookie_pairs(input: &str) -> IResult<&str, HashMap<&str, &str>> {
let (input, cookies) = separated_list0(
tag(";"),
tuple((
multispace0,
key,
tag("="),
alt((take_until(";"), take_until("'"))),
multispace0,
)),
)(input)?;
Ok((input, cookies.into_iter().map(|c| (c.1, c.3)).collect()))
}
#[allow(dead_code)]
fn options_header_cookie(input: &str) -> IResult<&str, HashMap<&str, &str>> {
let (input, (_, _, cookies, _)) = tuple((
alt((tag("-H\x20"), tag("--header\x20"))),
tag_no_case("'cookie:\x20"),
cookie_pairs,
tag("'"),
))(input)?;
Ok((input, cookies))
}
#[allow(dead_code)]
fn options_header_(input: &str) -> IResult<&str, (&str, &str)> {
let (input, (_, k, _, v, _)) = tuple((
alt((tag("-H\x20'"), tag("--header\x20'"))),
key,
tag(":\x20"),
take_until("'"),
tag("'"),
))(input)?;
Ok((input, (k, v)))
}
fn text(input: &str) -> IResult<&str, Header> {
let mut h = Header {
options_header_cookies: HashMap::new(),
options_headers_more: HashMap::new(),
};
let (input, opts) = separated_list0(
tag("\x20"),
tuple((
opt(tag("\\n")),
multispace0,
tuple((opt(options_header_cookie), opt(options_header_))),
)),
)(input)?;
for (_, _, o) in opts {
if let Some(cookies) = o.0 {
h.options_header_cookies = cookies;
continue;
}else if let Some(header) = o.1 {
h.options_headers_more.insert(header.0, header.1);
continue;
}
}
Ok((input, h))
}
#[allow(dead_code)]
fn debug<T: Debug>(o: T) {
println!("=> {:#?}", o);
}
fn main() {
let input1 = r#"
-H 'Cookie: NID=219=Ji47zdfV6mSKlkKmpVf8F67O80WTSw; DV=03-vBWQ2RBEqsNFUD5FEuieRJvkwrRfXaKa0v0Cj2wAAAAA' \
-H 'User-Agent: Mozilla/5.0 Macintosh;'"#;
debug(text(input1));
let input2 = r#"
-H 'Cookie: NID=219=Ji47zdfV6mSKlkKmpVf8F67O80WTSw; DV=03-vBWQ2RBEqsNFUD5FEuieRJvkwrRfXaKa0v0Cj2wAAAAA' \
-H 'User-Agent: Mozilla/5.0 Macintosh'"#;
debug(text(input2));
}
问题出在您的 cookie_pairs()
解析器上:
fn cookie_pairs(input: &str) -> IResult<&str, HashMap<&str, &str>> {
let (input, cookies) = separated_list0(
tag(";"),
tuple((
multispace0,
key,
tag("="),
alt((take_until(";"), take_until("'"))),
multispace0,
)),
)(input)?;
Ok((input, cookies.into_iter().map(|c| (c.1, c.3)).collect()))
}
alt()
组合器运行第一个解析器直至完成,只有在失败时才尝试第二个:
alt((take_until(";"), take_until("'")))
所以在尾随 ;
的情况下,解析器基本上会消耗整个输入,从而使父解析器失败并且不返回任何 cookie。
修复很简单。您必须将其替换为:
take_while(|ch| ch != '\'' && ch != ';')
当 '
或 ;
出现时将停止并且不会消耗整个输入:
fn cookie_pairs(input: &str) -> IResult<&str, HashMap<&str, &str>> {
let (input, cookies) = separated_list0(
tag(";"),
tuple((
multispace0,
key,
tag("="),
take_while(|ch| ch != '\'' && ch != ';'),
multispace0,
)),
)(input)?;
Ok((input, cookies.into_iter().map(|c| (c.1, c.3)).collect()))
}