为什么 boost spirit lex hung 而不是解析错误?
Why boost spirit lex hung instead parse error?
我很长一段时间没有使用 boost::spirit 然后又回来了。并停留在简单的情况下(天哪,有时我想杀死这个库......为什么为什么这么简单的任务使用 boost 如此复杂)。
#include <iostream>
#include <string>
#include <boost/bind.hpp>
#include <boost/ref.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace bs = boost::spirit;
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
//---------------------------------------------------------------------------------
// configuration
using base_iterator_type = std::string::iterator;
using token_type = lex::lexertl::token<base_iterator_type>;
using lexer_type = lex::lexertl::lexer<token_type>;
//---------------------------------------------------------------------------------
template <typename Lexer>
struct cpp_tokens : lex::lexer<Lexer>
{
cpp_tokens()
{
class_ = "class";
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
this->self += class_ | identifier;
this->self("WS") = lex::token_def<>("[ \t]+");
}
lex::token_def<> class_;
lex::token_def<std::string> identifier;
};
using cpp_lex = cpp_tokens<lexer_type>;
using cpp_iterator = cpp_lex::iterator_type;
//---------------------------------------------------------------------------------
template <typename Iterator, typename Lexer>
struct cpp_grammar_impl : bs::qi::grammar<Iterator, bs::qi::in_state_skipper<Lexer>>
{
template <typename TokenDef>
cpp_grammar_impl(TokenDef const& tok) : cpp_grammar_impl::base_type(program, "program")
{
program = tok.identifier >> tok.class_;
}
private:
using skipper_type = bs::qi::in_state_skipper<Lexer>;
using simple_rule = qi::rule<Iterator, skipper_type>;
simple_rule program;
};
using cpp_grammar = cpp_grammar_impl<cpp_iterator, cpp_lex::lexer_def>;
//---------------------------------------------------------------------------------
int main()
{
std::string str("111 class");
cpp_lex cppLexer;
cpp_grammar cppGrammar(cppLexer);
auto it = str.begin();
cpp_iterator iter = cppLexer.begin(it, str.end());
cpp_iterator end = cppLexer.end();
bool r = qi::phrase_parse(iter, end, cppGrammar, bs::qi::in_state("WS")[cppLexer.self]);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
}
else
{
std::string rest(iter, end);
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at: \"" << rest << "\"\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
}
我预计会出现解析错误,但在上面的示例中 boost::spirit 挂起处理器并占用所有内存。示例 - Coliru
我做错了什么以及如何解决?
使用调试器很容易发现解析没有挂掉
而是行
std::string rest(iter, end);
那就是挂了。我想它更像是 UB,因为 iter
和 end
迭代器不会解引用到字符元素,而是解引用到标记中。
所以简单的解决方法是使用基本迭代器:
std::string rest(it, str.end());
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
//---------------------------------------------------------------------------------
// configuration
using base_iterator_type = std::string::iterator;
using token_type = lex::lexertl::token<base_iterator_type>;
using lexer_type = lex::lexertl::lexer<token_type>;
//---------------------------------------------------------------------------------
template <typename Lexer> struct cpp_tokens : lex::lexer<Lexer> {
cpp_tokens() {
class_ = "class";
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
this->self += class_ | identifier;
this->self("WS") = lex::token_def<>("[ \t]+");
}
lex::token_def<> class_;
lex::token_def<std::string> identifier;
};
using cpp_lex = cpp_tokens<lexer_type>;
using cpp_iterator = cpp_lex::iterator_type;
//---------------------------------------------------------------------------------
template <typename Iterator, typename Lexer>
struct cpp_grammar_impl : qi::grammar<Iterator, qi::in_state_skipper<Lexer> > {
template <typename TokenDef>
cpp_grammar_impl(TokenDef const &tok) : cpp_grammar_impl::base_type(program, "program") {
program = tok.identifier >> tok.class_;
}
private:
using skipper_type = qi::in_state_skipper<Lexer>;
using simple_rule = qi::rule<Iterator, skipper_type>;
simple_rule program;
};
using cpp_grammar = cpp_grammar_impl<cpp_iterator, cpp_lex::lexer_def>;
//---------------------------------------------------------------------------------
int main() {
std::string str("111 class");
cpp_lex cppLexer;
cpp_grammar cppGrammar(cppLexer);
auto it = str.begin();
cpp_iterator iter = cppLexer.begin(it, str.end());
cpp_iterator end = cppLexer.end();
bool r = qi::phrase_parse(iter, end, cppGrammar, qi::in_state("WS")[cppLexer.self]);
if (r && iter == end) {
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
} else {
std::string rest(it, str.end());
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at: \"" << rest << "\"\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
}
打印:
-------------------------
Parsing failed
stopped at: "111 class"
-------------------------
Bye... :-)
我很长一段时间没有使用 boost::spirit 然后又回来了。并停留在简单的情况下(天哪,有时我想杀死这个库......为什么为什么这么简单的任务使用 boost 如此复杂)。
#include <iostream>
#include <string>
#include <boost/bind.hpp>
#include <boost/ref.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace bs = boost::spirit;
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;
//---------------------------------------------------------------------------------
// configuration
using base_iterator_type = std::string::iterator;
using token_type = lex::lexertl::token<base_iterator_type>;
using lexer_type = lex::lexertl::lexer<token_type>;
//---------------------------------------------------------------------------------
template <typename Lexer>
struct cpp_tokens : lex::lexer<Lexer>
{
cpp_tokens()
{
class_ = "class";
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
this->self += class_ | identifier;
this->self("WS") = lex::token_def<>("[ \t]+");
}
lex::token_def<> class_;
lex::token_def<std::string> identifier;
};
using cpp_lex = cpp_tokens<lexer_type>;
using cpp_iterator = cpp_lex::iterator_type;
//---------------------------------------------------------------------------------
template <typename Iterator, typename Lexer>
struct cpp_grammar_impl : bs::qi::grammar<Iterator, bs::qi::in_state_skipper<Lexer>>
{
template <typename TokenDef>
cpp_grammar_impl(TokenDef const& tok) : cpp_grammar_impl::base_type(program, "program")
{
program = tok.identifier >> tok.class_;
}
private:
using skipper_type = bs::qi::in_state_skipper<Lexer>;
using simple_rule = qi::rule<Iterator, skipper_type>;
simple_rule program;
};
using cpp_grammar = cpp_grammar_impl<cpp_iterator, cpp_lex::lexer_def>;
//---------------------------------------------------------------------------------
int main()
{
std::string str("111 class");
cpp_lex cppLexer;
cpp_grammar cppGrammar(cppLexer);
auto it = str.begin();
cpp_iterator iter = cppLexer.begin(it, str.end());
cpp_iterator end = cppLexer.end();
bool r = qi::phrase_parse(iter, end, cppGrammar, bs::qi::in_state("WS")[cppLexer.self]);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
}
else
{
std::string rest(iter, end);
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at: \"" << rest << "\"\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
}
我预计会出现解析错误,但在上面的示例中 boost::spirit 挂起处理器并占用所有内存。示例 - Coliru
我做错了什么以及如何解决?
使用调试器很容易发现解析没有挂掉
而是行
std::string rest(iter, end);
那就是挂了。我想它更像是 UB,因为 iter
和 end
迭代器不会解引用到字符元素,而是解引用到标记中。
所以简单的解决方法是使用基本迭代器:
std::string rest(it, str.end());
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
//---------------------------------------------------------------------------------
// configuration
using base_iterator_type = std::string::iterator;
using token_type = lex::lexertl::token<base_iterator_type>;
using lexer_type = lex::lexertl::lexer<token_type>;
//---------------------------------------------------------------------------------
template <typename Lexer> struct cpp_tokens : lex::lexer<Lexer> {
cpp_tokens() {
class_ = "class";
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
this->self += class_ | identifier;
this->self("WS") = lex::token_def<>("[ \t]+");
}
lex::token_def<> class_;
lex::token_def<std::string> identifier;
};
using cpp_lex = cpp_tokens<lexer_type>;
using cpp_iterator = cpp_lex::iterator_type;
//---------------------------------------------------------------------------------
template <typename Iterator, typename Lexer>
struct cpp_grammar_impl : qi::grammar<Iterator, qi::in_state_skipper<Lexer> > {
template <typename TokenDef>
cpp_grammar_impl(TokenDef const &tok) : cpp_grammar_impl::base_type(program, "program") {
program = tok.identifier >> tok.class_;
}
private:
using skipper_type = qi::in_state_skipper<Lexer>;
using simple_rule = qi::rule<Iterator, skipper_type>;
simple_rule program;
};
using cpp_grammar = cpp_grammar_impl<cpp_iterator, cpp_lex::lexer_def>;
//---------------------------------------------------------------------------------
int main() {
std::string str("111 class");
cpp_lex cppLexer;
cpp_grammar cppGrammar(cppLexer);
auto it = str.begin();
cpp_iterator iter = cppLexer.begin(it, str.end());
cpp_iterator end = cppLexer.end();
bool r = qi::phrase_parse(iter, end, cppGrammar, qi::in_state("WS")[cppLexer.self]);
if (r && iter == end) {
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
} else {
std::string rest(it, str.end());
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at: \"" << rest << "\"\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
}
打印:
-------------------------
Parsing failed
stopped at: "111 class"
-------------------------
Bye... :-)