在 boost::spirit::lex 中,如何添加具有语义操作和令牌 ID 的令牌?
In boost::spirit::lex, how do I add tokens with a semantic action and a token ID?
我知道如何添加带有标识符的标记定义:
this->self.add(identifier, ID_IDENTIFIER);
而且我知道如何使用语义操作添加标记定义:
this->self += whitespace [ lex::_pass = lex::pass_flags::pass_ignore ];
不幸的是,这不起作用:
this->self.add(whitespace
[ lex::_pass = lex::pass_flags::pass_ignore ],
ID_IDENTIFIER);
报错token无法转换成字符串(!?):
error C2664: 'const boost::spirit::lex::detail::lexer_def_>::adder &boost::spirit::lex::detail::lexer_def_>::adder::operator ()(wchar_t,unsigned int) const' : cannot convert argument 1 from 'const boost::proto::exprns_::expr' to 'const std::basic_string,std::allocator> &'
有趣的是,lexer.hpp 中的 adder
有一个 operator ()
,它将一个动作作为第三个参数——但它在我的 boost (1.55.0) 版本中被注释掉了。这在较新的版本中有效吗?
如果没有这个,我如何向词法分析器添加带有语义操作和 ID 的标记定义?
查看头文件似乎至少有两种可能的方法:
您可以使用 token_def
的 id
成员函数来在定义令牌后设置 id:
ellipses = "\.\.\.";
...
ellipses.id(ID_ELLIPSES);
您可以在定义令牌时使用token_def
的两个参数构造函数:
number = lex::token_def<>("[0-9]+", ID_NUMBER);
然后您可以像以前一样简单地添加语义操作:
this->self = ellipses[phx::ref(std::cout) << "Found ellipses.\n"] | '(' | ')' | number[phx::ref(std::cout) << "Found: " << phx::construct<std::string>(lex::_start, lex::_end) << '\n'];
下面的代码 based on Boost.Spirit.Lex example3.cpp 稍作改动(标有 //CHANGED
)以实现您想要的效果。
#include <iostream>
#include <string>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix.hpp>
using namespace boost::spirit;
namespace phx = boost::phoenix;
enum token_id //ADDED
{
ID_ELLIPSES = lex::min_token_id + 1,
ID_NUMBER
};
///////////////////////////////////////////////////////////////////////////////
// Token definition
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct example3_tokens : lex::lexer<Lexer>
{
example3_tokens()
{
// define the tokens to match
ellipses = "\.\.\.";
number = lex::token_def<>("[0-9]+", ID_NUMBER); //CHANGED
ellipses.id(ID_ELLIPSES); //CHANGED
// associate the tokens and the token set with the lexer
this->self = ellipses[phx::ref(std::cout) << "Found ellipses.\n"] | '(' | ')' | number[phx::ref(std::cout) << "Found: " << phx::construct<std::string>(lex::_start, lex::_end) << '\n']; //CHANGED
// define the whitespace to ignore (spaces, tabs, newlines and C-style
// comments)
this->self("WS")
= lex::token_def<>("[ \t\n]+") // whitespace
| "\/\*[^*]*\*+([^/*][^*]*\*+)*\/" // C style comments
;
}
// these tokens expose the iterator_range of the matched input sequence
lex::token_def<> ellipses, identifier, number;
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Lexer>
struct example3_grammar
: qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
{
template <typename TokenDef>
example3_grammar(TokenDef const& tok)
: example3_grammar::base_type(start)
{
start
= +(couplet | qi::token(ID_ELLIPSES)) //CHANGED
;
// A couplet matches nested left and right parenthesis.
// For example:
// (1) (1 2) (1 2 3) ...
// ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ...
// (((1))) ...
couplet
= qi::token(ID_NUMBER) //CHANGED
| '(' >> +couplet >> ')'
;
BOOST_SPIRIT_DEBUG_NODE(start);
BOOST_SPIRIT_DEBUG_NODE(couplet);
}
qi::rule<Iterator, qi::in_state_skipper<Lexer> > start, couplet;
};
///////////////////////////////////////////////////////////////////////////////
int main()
{
// iterator type used to expose the underlying input stream
typedef std::string::iterator base_iterator_type;
// This is the token type to return from the lexer iterator
typedef lex::lexertl::token<base_iterator_type> token_type;
// This is the lexer type to use to tokenize the input.
// Here we use the lexertl based lexer engine.
typedef lex::lexertl::actor_lexer<token_type> lexer_type; //CHANGED
// This is the token definition type (derived from the given lexer type).
typedef example3_tokens<lexer_type> example3_tokens;
// this is the iterator type exposed by the lexer
typedef example3_tokens::iterator_type iterator_type;
// this is the type of the grammar to parse
typedef example3_grammar<iterator_type, example3_tokens::lexer_def> example3_grammar;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
example3_tokens tokens; // Our lexer
example3_grammar calc(tokens); // Our parser
std::string str ="(1) (1 2) (1 2 3) ... ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ... (((1))) ..."; //CHANGED
// At this point we generate the iterator pair used to expose the
// tokenized input stream.
std::string::iterator it = str.begin();
iterator_type iter = tokens.begin(it, str.end());
iterator_type end = tokens.end();
// Parsing is done based on the token stream, not the character
// stream read from the input.
// Note how we use the lexer defined above as the skip parser.
bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
return 0;
}
我知道如何添加带有标识符的标记定义:
this->self.add(identifier, ID_IDENTIFIER);
而且我知道如何使用语义操作添加标记定义:
this->self += whitespace [ lex::_pass = lex::pass_flags::pass_ignore ];
不幸的是,这不起作用:
this->self.add(whitespace
[ lex::_pass = lex::pass_flags::pass_ignore ],
ID_IDENTIFIER);
报错token无法转换成字符串(!?):
error C2664: 'const boost::spirit::lex::detail::lexer_def_>::adder &boost::spirit::lex::detail::lexer_def_>::adder::operator ()(wchar_t,unsigned int) const' : cannot convert argument 1 from 'const boost::proto::exprns_::expr' to 'const std::basic_string,std::allocator> &'
有趣的是,lexer.hpp 中的 adder
有一个 operator ()
,它将一个动作作为第三个参数——但它在我的 boost (1.55.0) 版本中被注释掉了。这在较新的版本中有效吗?
如果没有这个,我如何向词法分析器添加带有语义操作和 ID 的标记定义?
查看头文件似乎至少有两种可能的方法:
您可以使用
token_def
的id
成员函数来在定义令牌后设置 id:ellipses = "\.\.\."; ... ellipses.id(ID_ELLIPSES);
您可以在定义令牌时使用
token_def
的两个参数构造函数:number = lex::token_def<>("[0-9]+", ID_NUMBER);
然后您可以像以前一样简单地添加语义操作:
this->self = ellipses[phx::ref(std::cout) << "Found ellipses.\n"] | '(' | ')' | number[phx::ref(std::cout) << "Found: " << phx::construct<std::string>(lex::_start, lex::_end) << '\n'];
下面的代码 based on Boost.Spirit.Lex example3.cpp 稍作改动(标有 //CHANGED
)以实现您想要的效果。
#include <iostream>
#include <string>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix.hpp>
using namespace boost::spirit;
namespace phx = boost::phoenix;
enum token_id //ADDED
{
ID_ELLIPSES = lex::min_token_id + 1,
ID_NUMBER
};
///////////////////////////////////////////////////////////////////////////////
// Token definition
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct example3_tokens : lex::lexer<Lexer>
{
example3_tokens()
{
// define the tokens to match
ellipses = "\.\.\.";
number = lex::token_def<>("[0-9]+", ID_NUMBER); //CHANGED
ellipses.id(ID_ELLIPSES); //CHANGED
// associate the tokens and the token set with the lexer
this->self = ellipses[phx::ref(std::cout) << "Found ellipses.\n"] | '(' | ')' | number[phx::ref(std::cout) << "Found: " << phx::construct<std::string>(lex::_start, lex::_end) << '\n']; //CHANGED
// define the whitespace to ignore (spaces, tabs, newlines and C-style
// comments)
this->self("WS")
= lex::token_def<>("[ \t\n]+") // whitespace
| "\/\*[^*]*\*+([^/*][^*]*\*+)*\/" // C style comments
;
}
// these tokens expose the iterator_range of the matched input sequence
lex::token_def<> ellipses, identifier, number;
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Lexer>
struct example3_grammar
: qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
{
template <typename TokenDef>
example3_grammar(TokenDef const& tok)
: example3_grammar::base_type(start)
{
start
= +(couplet | qi::token(ID_ELLIPSES)) //CHANGED
;
// A couplet matches nested left and right parenthesis.
// For example:
// (1) (1 2) (1 2 3) ...
// ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ...
// (((1))) ...
couplet
= qi::token(ID_NUMBER) //CHANGED
| '(' >> +couplet >> ')'
;
BOOST_SPIRIT_DEBUG_NODE(start);
BOOST_SPIRIT_DEBUG_NODE(couplet);
}
qi::rule<Iterator, qi::in_state_skipper<Lexer> > start, couplet;
};
///////////////////////////////////////////////////////////////////////////////
int main()
{
// iterator type used to expose the underlying input stream
typedef std::string::iterator base_iterator_type;
// This is the token type to return from the lexer iterator
typedef lex::lexertl::token<base_iterator_type> token_type;
// This is the lexer type to use to tokenize the input.
// Here we use the lexertl based lexer engine.
typedef lex::lexertl::actor_lexer<token_type> lexer_type; //CHANGED
// This is the token definition type (derived from the given lexer type).
typedef example3_tokens<lexer_type> example3_tokens;
// this is the iterator type exposed by the lexer
typedef example3_tokens::iterator_type iterator_type;
// this is the type of the grammar to parse
typedef example3_grammar<iterator_type, example3_tokens::lexer_def> example3_grammar;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
example3_tokens tokens; // Our lexer
example3_grammar calc(tokens); // Our parser
std::string str ="(1) (1 2) (1 2 3) ... ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ... (((1))) ..."; //CHANGED
// At this point we generate the iterator pair used to expose the
// tokenized input stream.
std::string::iterator it = str.begin();
iterator_type iter = tokens.begin(it, str.end());
iterator_type end = tokens.end();
// Parsing is done based on the token stream, not the character
// stream read from the input.
// Note how we use the lexer defined above as the skip parser.
bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
return 0;
}