boost::spirit::qi::lexeme 没有捕获完整的令牌
boost::spirit::qi::lexeme not capturing complete token
我正在尝试解析包含连字符的逗号分隔标记。但是 lexeme
会忽略所有连字符。部分程序如下
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_utree.hpp>
namespace qi = boost::spirit::qi;
namespace bs = boost::spirit;
template<typename Iterator>
struct my_grammar : public qi::grammar<Iterator,bs::utree(),bs::ascii::space_type>
{
my_grammar() : my_grammar::base_type(start,"MY")
{
start = token % ',';
token = qi::lexeme[ +qi::alnum % qi::char_('-') ];
}
qi::rule<Iterator,bs::utree(),bs::ascii::space_type> start;
qi::rule<Iterator,std::string()> token;
};
template<typename Iterator>
bool parse(Iterator & begin,Iterator end,my_grammar<Iterator> const & grammar)
{
bs::utree a;
auto r = qi::phrase_parse(begin,end,grammar,bs::ascii::space,a);
std::cout<<a<<'\n';
return r;
}
int main()
{
std::string input = "i-j-k,l-m-n,p3-14 ,5jhjj-kkk";
auto it = input.begin();
my_grammar<decltype(it)> g;
if(::parse(it,input.end(),g))
{
std::cout<<"parse success\n";
}
else
{
std::cout<<"parse failed\n";
}
std::cout<<"Unparsed input => "<< std::string{it,input.end()}<<'\n';
}
+qi::alnum % qi::char_('-')
这匹配一个或多个字母数字字符系列,以“-”分隔。根据文档,这就是它的作用。因此,你不应该期望它是其中的一部分。
使用
+(qi::alnum | char_('-'))
相反。或者
+qi::char_("-A-Za-z0-9")
或者在上下文中,甚至:
token = qi::raw[ qi::lexeme[+(qi::alnum | '-')] ];
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_utree.hpp>
namespace qi = boost::spirit::qi;
namespace bs = boost::spirit;
template <typename Iterator, typename Result = std::vector<std::string> > struct my_grammar : public qi::grammar<Iterator, Result(), bs::ascii::space_type> {
my_grammar() : my_grammar::base_type(start, "MY") {
start = token % ',';
token = qi::raw[ qi::lexeme[+(qi::alnum | '-')] ];
BOOST_SPIRIT_DEBUG_NODES((start)(token))
}
qi::rule<Iterator, Result(), bs::ascii::space_type> start;
qi::rule<Iterator, std::string()> token;
};
template <typename Iterator> bool parse(Iterator &begin, Iterator end, my_grammar<Iterator> const &grammar) {
std::vector<std::string> parsed;
auto r = qi::phrase_parse(begin, end, grammar, bs::ascii::space, parsed);
for (auto& el : parsed)
std::cout << el << " ";
std::cout << '\n';
return r;
}
int main() {
std::string input = "i-j-k,l-m-n,p3-14 ,5jhjj-kkk";
auto it = input.begin();
my_grammar<decltype(it)> g;
if (::parse(it, input.end(), g)) {
std::cout << "parse success\n";
} else {
std::cout << "parse failed\n";
}
std::cout << "Unparsed input => " << std::string{ it, input.end() } << '\n';
}
版画
i-j-k l-m-n p3-14 5jhjj-kkk
parse success
Unparsed input =>
启用调试:
<start>
<try>i-j-k,l-m-n,p3-14 ,5</try>
<token>
<try>i-j-k,l-m-n,p3-14 ,5</try>
<success>,l-m-n,p3-14 ,5jhjj-</success>
<attributes>[[i, -, j, -, k]]</attributes>
</token>
<token>
<try>l-m-n,p3-14 ,5jhjj-k</try>
<success>,p3-14 ,5jhjj-kkk</success>
<attributes>[[l, -, m, -, n]]</attributes>
</token>
<token>
<try>p3-14 ,5jhjj-kkk</try>
<success> ,5jhjj-kkk</success>
<attributes>[[p, 3, -, 1, 4]]</attributes>
</token>
<token>
<try>5jhjj-kkk</try>
<success></success>
<attributes>[[5, j, h, j, j, -, k, k, k]]</attributes>
</token>
<success></success>
<attributes>[[[i, -, j, -, k], [l, -, m, -, n], [p, 3, -, 1, 4], [5, j, h, j, j, -, k, k, k]]]</attributes>
</start>
我正在尝试解析包含连字符的逗号分隔标记。但是 lexeme
会忽略所有连字符。部分程序如下
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_utree.hpp>
namespace qi = boost::spirit::qi;
namespace bs = boost::spirit;
template<typename Iterator>
struct my_grammar : public qi::grammar<Iterator,bs::utree(),bs::ascii::space_type>
{
my_grammar() : my_grammar::base_type(start,"MY")
{
start = token % ',';
token = qi::lexeme[ +qi::alnum % qi::char_('-') ];
}
qi::rule<Iterator,bs::utree(),bs::ascii::space_type> start;
qi::rule<Iterator,std::string()> token;
};
template<typename Iterator>
bool parse(Iterator & begin,Iterator end,my_grammar<Iterator> const & grammar)
{
bs::utree a;
auto r = qi::phrase_parse(begin,end,grammar,bs::ascii::space,a);
std::cout<<a<<'\n';
return r;
}
int main()
{
std::string input = "i-j-k,l-m-n,p3-14 ,5jhjj-kkk";
auto it = input.begin();
my_grammar<decltype(it)> g;
if(::parse(it,input.end(),g))
{
std::cout<<"parse success\n";
}
else
{
std::cout<<"parse failed\n";
}
std::cout<<"Unparsed input => "<< std::string{it,input.end()}<<'\n';
}
+qi::alnum % qi::char_('-')
这匹配一个或多个字母数字字符系列,以“-”分隔。根据文档,这就是它的作用。因此,你不应该期望它是其中的一部分。
使用
+(qi::alnum | char_('-'))
相反。或者
+qi::char_("-A-Za-z0-9")
或者在上下文中,甚至:
token = qi::raw[ qi::lexeme[+(qi::alnum | '-')] ];
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/support_utree.hpp>
namespace qi = boost::spirit::qi;
namespace bs = boost::spirit;
template <typename Iterator, typename Result = std::vector<std::string> > struct my_grammar : public qi::grammar<Iterator, Result(), bs::ascii::space_type> {
my_grammar() : my_grammar::base_type(start, "MY") {
start = token % ',';
token = qi::raw[ qi::lexeme[+(qi::alnum | '-')] ];
BOOST_SPIRIT_DEBUG_NODES((start)(token))
}
qi::rule<Iterator, Result(), bs::ascii::space_type> start;
qi::rule<Iterator, std::string()> token;
};
template <typename Iterator> bool parse(Iterator &begin, Iterator end, my_grammar<Iterator> const &grammar) {
std::vector<std::string> parsed;
auto r = qi::phrase_parse(begin, end, grammar, bs::ascii::space, parsed);
for (auto& el : parsed)
std::cout << el << " ";
std::cout << '\n';
return r;
}
int main() {
std::string input = "i-j-k,l-m-n,p3-14 ,5jhjj-kkk";
auto it = input.begin();
my_grammar<decltype(it)> g;
if (::parse(it, input.end(), g)) {
std::cout << "parse success\n";
} else {
std::cout << "parse failed\n";
}
std::cout << "Unparsed input => " << std::string{ it, input.end() } << '\n';
}
版画
i-j-k l-m-n p3-14 5jhjj-kkk
parse success
Unparsed input =>
启用调试:
<start>
<try>i-j-k,l-m-n,p3-14 ,5</try>
<token>
<try>i-j-k,l-m-n,p3-14 ,5</try>
<success>,l-m-n,p3-14 ,5jhjj-</success>
<attributes>[[i, -, j, -, k]]</attributes>
</token>
<token>
<try>l-m-n,p3-14 ,5jhjj-k</try>
<success>,p3-14 ,5jhjj-kkk</success>
<attributes>[[l, -, m, -, n]]</attributes>
</token>
<token>
<try>p3-14 ,5jhjj-kkk</try>
<success> ,5jhjj-kkk</success>
<attributes>[[p, 3, -, 1, 4]]</attributes>
</token>
<token>
<try>5jhjj-kkk</try>
<success></success>
<attributes>[[5, j, h, j, j, -, k, k, k]]</attributes>
</token>
<success></success>
<attributes>[[[i, -, j, -, k], [l, -, m, -, n], [p, 3, -, 1, 4], [5, j, h, j, j, -, k, k, k]]]</attributes>
</start>