Boost.Spirit.Qi 替代 ( | ) 解析器问题
Boost.Spirit.Qi alternative ( | ) parser issue
我正在编写一个 Qi 解析器来解析 IRC 消息,转录 RFC 2812。语法中有一个完全通用的替代方法:
auto const hostname = shortname >> *('.' >> shortname);
auto const nickUserHost = nickname >> -(-('!' >> user) >> '@' >> host);
auto const prefix = hostname | nickUserHost;
我很困惑地看到我的测试字符串 ("D-z!D-z@mib-A3A026FF.rev.sfr.net"
) 匹配 nickUserHost
,但不匹配 prefix
。
我看到的唯一值得注意的是 nickUserHost
的 host
本身是根据 hostname
定义的,但我不确定它会如何影响解析无论如何。
通过附加 >> eoi
,如果未到达输入末尾,您明确地使解析失败。
#include <string>
#include <iostream>
#include <iomanip>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
template <typename Expr>
void test(std::string name, Expr const& expr) {
std::string const test = "D-z!D-z@mib-A3A026FF.rev.sfr.net";
auto f = begin(test);
bool ok = qi::parse(f, end(test), expr);
std::cout << name << ": " << ok << "\n";
if (f != end(test))
std::cout << " -- remaining input: '" << std::string(f, end(test)) << "'\n";
}
int main() {
auto const hexdigit = qi::char_("0123456789ABCDEF");
auto const special = qi::char_("\x5b-\x60\x7b-\x7d");
auto const oneToThreeDigits = qi::repeat(1, 3)[qi::digit];
auto const ip4addr = oneToThreeDigits >> '.' >> oneToThreeDigits >> '.' >> oneToThreeDigits >> '.' >> oneToThreeDigits;
auto const ip6addr = +(hexdigit >> qi::repeat(7)[':' >> +hexdigit]) | ("0:0:0:0:0:" >> (qi::lit('0') | "FFFF") >> ':' >> ip4addr);
auto const hostaddr = ip4addr | ip6addr;
auto const nickname = (qi::alpha | special) >> qi::repeat(0, 8)[qi::alnum | special | '-'];
auto const user = +(~qi::char_("\x0d\x0a\x20\x40"));
auto const shortname = qi::alnum >> *(qi::alnum | '-');
auto const hostname = shortname >> *('.' >> shortname);
auto const host = hostname | hostaddr;
auto const nickUserHost = nickname >> -(-('!' >> user) >> '@' >> host);
auto const prefix = hostname | nickUserHost; // The problematic alternative
std::cout << std::boolalpha;
test("hostname", hostname);
test("nickUserHost", nickUserHost);
test("prefix", prefix);
}
版画
hostname: true
-- remaining input: '!D-z@mib-A3A026FF.rev.sfr.net'
nickUserHost: true
prefix: true
-- remaining input: '!D-z@mib-A3A026FF.rev.sfr.net'
我正在编写一个 Qi 解析器来解析 IRC 消息,转录 RFC 2812。语法中有一个完全通用的替代方法:
auto const hostname = shortname >> *('.' >> shortname);
auto const nickUserHost = nickname >> -(-('!' >> user) >> '@' >> host);
auto const prefix = hostname | nickUserHost;
我很困惑地看到我的测试字符串 ("D-z!D-z@mib-A3A026FF.rev.sfr.net"
) 匹配 nickUserHost
,但不匹配 prefix
。
我看到的唯一值得注意的是 nickUserHost
的 host
本身是根据 hostname
定义的,但我不确定它会如何影响解析无论如何。
通过附加 >> eoi
,如果未到达输入末尾,您明确地使解析失败。
#include <string>
#include <iostream>
#include <iomanip>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
template <typename Expr>
void test(std::string name, Expr const& expr) {
std::string const test = "D-z!D-z@mib-A3A026FF.rev.sfr.net";
auto f = begin(test);
bool ok = qi::parse(f, end(test), expr);
std::cout << name << ": " << ok << "\n";
if (f != end(test))
std::cout << " -- remaining input: '" << std::string(f, end(test)) << "'\n";
}
int main() {
auto const hexdigit = qi::char_("0123456789ABCDEF");
auto const special = qi::char_("\x5b-\x60\x7b-\x7d");
auto const oneToThreeDigits = qi::repeat(1, 3)[qi::digit];
auto const ip4addr = oneToThreeDigits >> '.' >> oneToThreeDigits >> '.' >> oneToThreeDigits >> '.' >> oneToThreeDigits;
auto const ip6addr = +(hexdigit >> qi::repeat(7)[':' >> +hexdigit]) | ("0:0:0:0:0:" >> (qi::lit('0') | "FFFF") >> ':' >> ip4addr);
auto const hostaddr = ip4addr | ip6addr;
auto const nickname = (qi::alpha | special) >> qi::repeat(0, 8)[qi::alnum | special | '-'];
auto const user = +(~qi::char_("\x0d\x0a\x20\x40"));
auto const shortname = qi::alnum >> *(qi::alnum | '-');
auto const hostname = shortname >> *('.' >> shortname);
auto const host = hostname | hostaddr;
auto const nickUserHost = nickname >> -(-('!' >> user) >> '@' >> host);
auto const prefix = hostname | nickUserHost; // The problematic alternative
std::cout << std::boolalpha;
test("hostname", hostname);
test("nickUserHost", nickUserHost);
test("prefix", prefix);
}
版画
hostname: true
-- remaining input: '!D-z@mib-A3A026FF.rev.sfr.net'
nickUserHost: true
prefix: true
-- remaining input: '!D-z@mib-A3A026FF.rev.sfr.net'