使用 Boost Spirit 解析命令语言
Parsing a command language using Boost Spirit
我正在为一种命令语言构建一个解析器,它是我从各种示例中拼凑而成的。我已经阅读了 Boost Spirit Qi 和 Lex 文档,我认为我了解基础知识,但根据我所阅读的内容,我应该避免属性并使用 utree。我在 utree 上找到的文档基本上很糟糕。鉴于下面的代码,我有以下问题:
- 如何注释解析器以使用 utree 创建 AST?
- 如何在构建后遍历 utree 以发现解析的内容?例如对于仅令牌命令,例如 SET DEBUG ON,以及带有值的命令,例如 LOAD "file.ext" 或 SET DRIVE C:
- 我想添加一个注释字符,“!”。那么,我怎样才能忽略之后的所有内容 - 除非它出现在带引号的字符串中?
- 为什么我的错误处理程序在我输入无效时没有被调用?
如何使命令标记不区分大小写,但不更改带引号的字符串的内容?
#include <Windows.h>
#include <conio.h>
#include <string>
#include <vector>
#include <iostream>
#define BOOST_SPIRIT_DEBUG
#include <boost\spirit\include\qi.hpp>
#include <boost\spirit\include\phoenix.hpp>
#include <boost\spirit\include\lex.hpp>
#include <boost\spirit\include\lex_lexertl.hpp>
using namespace std;
using namespace boost::spirit;
using boost::spirit::utree;
//
// Tokens used by the command grammar
//
template <typename Lexer>
struct command_tokens : lex::lexer <Lexer>
{
command_tokens () :
//
// Verbs, with abbreviation (just enough characters to make each unique)
//
boot ("B(O(O(T)?)?)?"),
exit ("E(X(I(T)?)?)?"),
help ("H(E(L(P)?)?)?"),
dash_help ("-H(E(L(P)?)?)?"),
slash_help ("\/H(E(L(P)?)?)?"),
load ("L(O(A(D)?)?)?"),
quit ("Q(U(I(T)?)?)?"),
set ("SE(T)?"),
show ("SH(O(W)?)?"),
//
// Nouns, with abbreviation (the minimum number of characters is usually 3, but may be more to ensure uniqueness)
//
debug ("DEB(U(G)?)?"),
drive ("DRI(V(E)?)?"),
trace ("TRA(C(E)?)?"),
//
// Qualifiers
//
on ("ON"),
off ("OFF"),
//
// Tokens to pass back to the grammar
//
quoted_string ("...")
{
using namespace boost::spirit::lex;
//
// Associate the tokens with the lexer
//
this->self
= boot
| exit
| help
| dash_help
| slash_help
| load
| quit
| set
| show
| debug
| drive
| trace
| off
| on
| quoted_string
;
//
// Define whitespace to ignore: space, tab, newline
//
this->self ("WS")
= lex::token_def <> ("[ \t\n]+")
;
}
lex::token_def <> boot;
lex::token_def <> dash_help;
lex::token_def <> debug;
lex::token_def <string> drive;
lex::token_def <> exit;
lex::token_def <> help;
lex::token_def <> load;
lex::token_def <> off;
lex::token_def <> on;
lex::token_def <> quit;
lex::token_def <string> quoted_string;
lex::token_def <> set;
lex::token_def <> show;
lex::token_def <> slash_help;
lex::token_def <> trace;
};
//
// Display parse error
//
struct error_handler_
{
template <typename, typename, typename>
struct result
{
typedef void type;
};
template <typename Iterator>
void operator ()
(
qi::info const& What,
Iterator Err_pos,
Iterator Last
) const
{
cout << "Error! Expecting "
<< What
<< " here: \""
<< string (Err_pos, Last)
<< "\""
<< endl;
}
};
boost::phoenix::function <error_handler_> const error_handler = error_handler_ ();
//
// Grammar describing the valid commands
//
template <typename Iterator, typename Lexer>
struct command_grammar : qi::grammar <Iterator>
{
template <typename Lexer>
command_grammar (command_tokens <Lexer> const& Tok) :
command_grammar::base_type (start)
{
using qi::on_error;
using qi::fail;
using qi::char_;
start
= +commands;
commands
= (
boot_command
| exit_command
| help_command
| load_command
| set_command
| show_command
);
boot_command
= Tok.boot;
exit_command
= Tok.exit
| Tok.quit;
help_command
= Tok.help
| Tok.dash_help
| Tok.slash_help;
load_command
= Tok.load >> Tok.quoted_string;
set_command
= Tok.set;
show_command
= Tok.show;
set_property
= debug_property
| drive_property
| trace_property;
debug_property
= Tok.debug >> on_off;
drive_property
= Tok.drive >> char_ ("A-Z") >> char_ (":");
trace_property
= Tok.trace >> on_off;
on_off
= Tok.on
| Tok.off;
BOOST_SPIRIT_DEBUG_NODE (start);
BOOST_SPIRIT_DEBUG_NODE (commands);
BOOST_SPIRIT_DEBUG_NODE (boot_command);
BOOST_SPIRIT_DEBUG_NODE (exit_command);
BOOST_SPIRIT_DEBUG_NODE (help_command);
BOOST_SPIRIT_DEBUG_NODE (load_command);
BOOST_SPIRIT_DEBUG_NODE (quit_command);
BOOST_SPIRIT_DEBUG_NODE (set_command);
BOOST_SPIRIT_DEBUG_NODE (show_command);
BOOST_SPIRIT_DEBUG_NODE (set_property);
BOOST_SPIRIT_DEBUG_NODE (debug_property);
BOOST_SPIRIT_DEBUG_NODE (drive_property);
BOOST_SPIRIT_DEBUG_NODE (trace_property);
BOOST_SPIRIT_DEBUG_NODE (target_property);
on_error <fail> (start, error_handler (_4, _3, _2));
}
qi::rule <Iterator> start;
qi::rule <Iterator> commands;
qi::rule <Iterator> boot_command;
qi::rule <Iterator> exit_command;
qi::rule <Iterator> help_command;
qi::rule <Iterator> load_command;
qi::rule <Iterator> quit_command;
qi::rule <Iterator> set_command;
qi::rule <Iterator> show_command;
qi::rule <Iterator> set_property;
qi::rule <Iterator> debug_property;
qi::rule <Iterator, string ()> drive_property;
qi::rule <Iterator> target_property;
qi::rule <Iterator> trace_property;
qi::rule <Iterator> on_off;
};
int
main
(
int Argc,
PCHAR Argv
)
{
typedef std::string::iterator base_iterator_type;
typedef lex::lexertl::token <base_iterator_type> token_type;
typedef lex::lexertl::lexer <token_type> lexer_type;
typedef command_tokens <lexer_type> command_tokens;
typedef command_tokens::iterator_type iterator_type;
typedef command_grammar <iterator_type, command_tokens::lexer_def> command_grammar;
command_tokens tokens;
command_grammar commands (tokens);
string input = "SET DRIVE C:";
string::iterator it = input.begin ();
iterator_type iter = tokens.begin (it, input.end ());
iterator_type end = tokens.end ();
string ws ("WS");
bool result = lex::tokenize_and_phrase_parse (it, input.end (), tokens, commands, qi::in_state (ws) [tokens.self]);
if (result)
{
cout << "Parse succeeded" << endl;
}
else
{
string rest (it, input.end ());
cout << "Parse failed" << endl;
cout << "Stopped at " << rest << endl;
}
return 0;
} // End of main
我将避开您的大部分代码,原因很简单,经验告诉我 Lex
和 utree
通常不是您想要使用的。
你想要的是定义一个 AST 来表示你的命令语言,然后想出一个语法来构建它。
AST
namespace Ast {
struct NoValue {
bool operator==(NoValue const &) const { return true; }
};
template <typename Tag> struct GenericCommand {};
namespace tag {
struct boot;
struct help;
struct load;
struct exit;
struct set;
struct show;
};
template <> struct GenericCommand<tag::load> { std::string name; };
template <> struct GenericCommand<tag::set> {
std::string property;
boost::variant<NoValue, std::string, bool> value; // optional
};
using BootCmd = GenericCommand<tag::boot>;
using HelpCmd = GenericCommand<tag::help>;
using ExitCmd = GenericCommand<tag::exit>;
using ShowCmd = GenericCommand<tag::show>;
using LoadCmd = GenericCommand<tag::load>;
using SetCmd = GenericCommand<tag::set>;
using Command = boost::variant<BootCmd, HelpCmd, ExitCmd, ShowCmd, LoadCmd, SetCmd>;
using Commands = std::list<Command>;
}
完整代码仅添加了调试输出助手。这是完整的融合改编:
BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::GenericCommand) (Tag), )
BOOST_FUSION_ADAPT_STRUCT(Ast::LoadCmd, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::SetCmd, property, value)
语法
这里我做一些选择:
让我们把事情变白-space 并且不区分大小写,允许行分隔的命令:(另请参阅 Boost spirit skipper issues)
start = skip(blank) [lazy_command % eol];
让我们使用Nabialek Trick将命令与前缀关联起来。我使用了一段非常简单的代码来生成唯一的前缀:
std::set<std::string> const verbs { "boot", "exit", "help", "-help", "/help", "load", "quit", "set", "show", };
for (auto const full : verbs)
for (auto partial=full; partial.length(); partial.resize(partial.size()-1)) {
auto n = std::distance(verbs.lower_bound(partial), verbs.upper_bound(full));
if (n < 2) std::cout << "(\"" << partial << "\", &" << full << "_command)\n";
}
您可以对属性执行相同的操作,但我认为当前设置更简单:
template <typename Iterator>
struct command_grammar : qi::grammar<Iterator, Ast::Commands()> {
command_grammar() : command_grammar::base_type(start) {
using namespace qi;
start = skip(blank) [lazy_command % eol];
// nabialek trick
lazy_command = no_case [ commands [ _a = _1 ] > lazy(*_a) [ _val = _1 ] ];
on_off.add("on", true)("off", false);
commands.add
("-help", &help_command) ("-hel", &help_command) ("-he", &help_command) ("-h", &help_command)
("/help", &help_command) ("/hel", &help_command) ("/he", &help_command) ("/h", &help_command)
("help", &help_command) ("hel", &help_command) ("he", &help_command) ("h", &help_command)
("boot", &boot_command) ("boo", &boot_command) ("bo", &boot_command) ("b", &boot_command)
("exit", &exit_command) ("exi", &exit_command) ("ex", &exit_command) ("e", &exit_command)
("quit", &exit_command) ("qui", &exit_command) ("qu", &exit_command) ("q", &exit_command)
("load", &load_command) ("loa", &load_command) ("lo", &load_command) ("l", &load_command)
("set", &set_command) ("se", &set_command)
("show", &show_command) ("sho", &show_command) ("sh", &show_command);
quoted_string = '"' >> +~char_('"') >> '"';
// nullary commands
boot_command_ = eps;
exit_command_ = eps;
help_command_ = eps;
show_command_ = eps;
// non-nullary commands
load_command_ = quoted_string;
drive_ = char_("A-Z") >> ':';
set_command_ = no_case[lit("drive")|"driv"|"dri"|"dr"] >> attr("DRIVE") >> drive_
| no_case[ (lit("debug")|"debu"|"deb"|"de") >> attr("DEBUG") >> on_off ]
| no_case[ (lit("trace")|"trac"|"tra"|"tr"|"t") >> attr("TRACE") >> on_off ]
;
BOOST_SPIRIT_DEBUG_NODES(
(start)(lazy_command)
(boot_command) (exit_command) (help_command) (show_command) (set_command) (load_command)
(boot_command_)(exit_command_)(help_command_)(show_command_)(set_command_)(load_command_)
(quoted_string)(drive_)
)
on_error<fail>(start, error_handler_(_4, _3, _2));
on_error<fail>(lazy_command, error_handler_(_4, _3, _2));
boot_command = boot_command_;
exit_command = exit_command_;
help_command = help_command_;
load_command = load_command_;
exit_command = exit_command_;
set_command = set_command_;
show_command = show_command_;
}
private:
struct error_handler_t {
template <typename...> struct result { typedef void type; };
void operator()(qi::info const &What, Iterator Err_pos, Iterator Last) const {
std::cout << "Error! Expecting " << What << " here: \"" << std::string(Err_pos, Last) << "\"" << std::endl;
}
};
boost::phoenix::function<error_handler_t> const error_handler_ = error_handler_t {};
qi::rule<Iterator, Ast::Commands()> start;
using Skipper = qi::blank_type;
using CommandRule = qi::rule<Iterator, Ast::Command(), Skipper>;
qi::symbols<char, bool> on_off;
qi::symbols<char, CommandRule const*> commands;
qi::rule<Iterator, std::string()> drive_property, quoted_string, drive_;
qi::rule<Iterator, Ast::Command(), Skipper, qi::locals<CommandRule const*> > lazy_command;
CommandRule boot_command, exit_command, help_command, load_command, set_command, show_command;
qi::rule<Iterator, Ast::BootCmd(), Skipper> boot_command_;
qi::rule<Iterator, Ast::ExitCmd(), Skipper> exit_command_;
qi::rule<Iterator, Ast::HelpCmd(), Skipper> help_command_;
qi::rule<Iterator, Ast::LoadCmd(), Skipper> load_command_;
qi::rule<Iterator, Ast::SetCmd(), Skipper> set_command_;
qi::rule<Iterator, Ast::ShowCmd(), Skipper> show_command_;
};
测试用例
int main() {
typedef std::string::const_iterator It;
command_grammar<It> const commands;
for (std::string const input : {
"help",
"set drive C:",
"SET DRIVE C:",
"loAD \"XYZ\"",
"load \"anything \nat all\"",
// multiline
"load \"ABC\"\nhelp\n-he\n/H\nsh\nse t off\nse debug ON\nb\nq"
})
{
std::cout << "----- '" << input << "' -----\n";
It f = input.begin(), l = input.end();
Ast::Commands parsed;
bool result = parse(f, l, commands, parsed);
if (result) {
for (auto& cmd : parsed) {
std::cout << "Parsed " << cmd << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed '" << std::string(f, l) << "'\n";
}
}
}
打印:
----- 'help' -----
Parsed HELP ()
----- 'set drive C:' -----
Parsed SET (DRIVE C)
----- 'SET DRIVE C:' -----
Parsed SET (DRIVE C)
----- 'loAD "XYZ"' -----
Parsed LOAD (XYZ)
----- 'load "anything
at all"' -----
Parsed LOAD (anything
at all)
----- 'load "ABC"
help
-he
/H
sh
se t off
se debug ON
b
q' -----
Parsed LOAD (ABC)
Parsed HELP ()
Parsed HELP ()
Parsed HELP ()
Parsed SHOW ()
Parsed SET (TRACE 0)
Parsed SET (DEBUG 1)
Parsed BOOT ()
Parsed EXIT ()
完整列表
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
namespace Ast {
struct NoValue {
bool operator==(NoValue const &) const { return true; }
friend std::ostream& operator<<(std::ostream& os, NoValue) { return os; }
};
template <typename Tag> struct GenericCommand {};
namespace tag {
struct boot {};
struct help {};
struct load {};
struct exit {};
struct set {};
struct show {};
static std::ostream& operator<<(std::ostream& os, boot) { return os << "BOOT"; }
static std::ostream& operator<<(std::ostream& os, help) { return os << "HELP"; }
static std::ostream& operator<<(std::ostream& os, load) { return os << "LOAD"; }
static std::ostream& operator<<(std::ostream& os, exit) { return os << "EXIT"; }
static std::ostream& operator<<(std::ostream& os, set ) { return os << "SET"; }
static std::ostream& operator<<(std::ostream& os, show) { return os << "SHOW"; }
};
template <> struct GenericCommand<tag::load> { std::string name; };
template <> struct GenericCommand<tag::set> {
std::string property;
boost::variant<NoValue, std::string, bool> value; // optional
};
using BootCmd = GenericCommand<tag::boot>;
using HelpCmd = GenericCommand<tag::help>;
using ExitCmd = GenericCommand<tag::exit>;
using ShowCmd = GenericCommand<tag::show>;
using LoadCmd = GenericCommand<tag::load>;
using SetCmd = GenericCommand<tag::set>;
using Command = boost::variant<BootCmd, HelpCmd, ExitCmd, ShowCmd, LoadCmd, SetCmd>;
using Commands = std::list<Command>;
template <typename Tag>
static inline std::ostream& operator<<(std::ostream& os, Ast::GenericCommand<Tag> const& command) {
return os << Tag{} << " " << boost::fusion::as_vector(command);
}
}
BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::GenericCommand) (Tag), )
BOOST_FUSION_ADAPT_STRUCT(Ast::LoadCmd, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::SetCmd, property, value)
template <typename Iterator>
struct command_grammar : qi::grammar<Iterator, Ast::Commands()> {
command_grammar() : command_grammar::base_type(start) {
using namespace qi;
start = skip(blank) [lazy_command % eol];
// nabialek trick
lazy_command = no_case [ commands [ _a = _1 ] > lazy(*_a) [ _val = _1 ] ];
on_off.add("on", true)("off", false);
commands.add
("-help", &help_command) ("-hel", &help_command) ("-he", &help_command) ("-h", &help_command)
("/help", &help_command) ("/hel", &help_command) ("/he", &help_command) ("/h", &help_command)
("help", &help_command) ("hel", &help_command) ("he", &help_command) ("h", &help_command)
("boot", &boot_command) ("boo", &boot_command) ("bo", &boot_command) ("b", &boot_command)
("exit", &exit_command) ("exi", &exit_command) ("ex", &exit_command) ("e", &exit_command)
("quit", &exit_command) ("qui", &exit_command) ("qu", &exit_command) ("q", &exit_command)
("load", &load_command) ("loa", &load_command) ("lo", &load_command) ("l", &load_command)
("set", &set_command) ("se", &set_command)
("show", &show_command) ("sho", &show_command) ("sh", &show_command);
quoted_string = '"' >> +~char_('"') >> '"';
// nullary commands
boot_command_ = eps;
exit_command_ = eps;
help_command_ = eps;
show_command_ = eps;
// non-nullary commands
load_command_ = quoted_string;
drive_ = char_("A-Z") >> ':';
set_command_ = no_case[lit("drive")|"driv"|"dri"|"dr"] >> attr("DRIVE") >> drive_
| no_case[ (lit("debug")|"debu"|"deb"|"de") >> attr("DEBUG") >> on_off ]
| no_case[ (lit("trace")|"trac"|"tra"|"tr"|"t") >> attr("TRACE") >> on_off ]
;
BOOST_SPIRIT_DEBUG_NODES(
(start)(lazy_command)
(boot_command) (exit_command) (help_command) (show_command) (set_command) (load_command)
(boot_command_)(exit_command_)(help_command_)(show_command_)(set_command_)(load_command_)
(quoted_string)(drive_)
)
on_error<fail>(start, error_handler_(_4, _3, _2));
on_error<fail>(lazy_command, error_handler_(_4, _3, _2));
boot_command = boot_command_;
exit_command = exit_command_;
help_command = help_command_;
load_command = load_command_;
exit_command = exit_command_;
set_command = set_command_;
show_command = show_command_;
}
private:
struct error_handler_t {
template <typename...> struct result { typedef void type; };
void operator()(qi::info const &What, Iterator Err_pos, Iterator Last) const {
std::cout << "Error! Expecting " << What << " here: \"" << std::string(Err_pos, Last) << "\"" << std::endl;
}
};
boost::phoenix::function<error_handler_t> const error_handler_ = error_handler_t {};
qi::rule<Iterator, Ast::Commands()> start;
using Skipper = qi::blank_type;
using CommandRule = qi::rule<Iterator, Ast::Command(), Skipper>;
qi::symbols<char, bool> on_off;
qi::symbols<char, CommandRule const*> commands;
qi::rule<Iterator, std::string()> drive_property, quoted_string, drive_;
qi::rule<Iterator, Ast::Command(), Skipper, qi::locals<CommandRule const*> > lazy_command;
CommandRule boot_command, exit_command, help_command, load_command, set_command, show_command;
qi::rule<Iterator, Ast::BootCmd(), Skipper> boot_command_;
qi::rule<Iterator, Ast::ExitCmd(), Skipper> exit_command_;
qi::rule<Iterator, Ast::HelpCmd(), Skipper> help_command_;
qi::rule<Iterator, Ast::LoadCmd(), Skipper> load_command_;
qi::rule<Iterator, Ast::SetCmd(), Skipper> set_command_;
qi::rule<Iterator, Ast::ShowCmd(), Skipper> show_command_;
};
int main() {
typedef std::string::const_iterator It;
command_grammar<It> const commands;
for (std::string const input : {
"help",
"set drive C:",
"SET DRIVE C:",
"loAD \"XYZ\"",
"load \"anything \nat all\"",
// multiline
"load \"ABC\"\nhelp\n-he\n/H\nsh\nse t off\nse debug ON\nb\nq"
})
{
std::cout << "----- '" << input << "' -----\n";
It f = input.begin(), l = input.end();
Ast::Commands parsed;
bool result = parse(f, l, commands, parsed);
if (result) {
for (auto& cmd : parsed) {
std::cout << "Parsed " << cmd << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed '" << std::string(f, l) << "'\n";
}
}
}
POST-脚本
Q. How do I annotate the parser to create an AST using utree?
- 见上文
Q. How do I walk the utree after it is built, to discover what was parsed?
Q. I want to add a comment character, "!". So, how can I ignore everything after that - except when it occurs in a quoted string?
只需让 Skipper
类型的规则解析例如:
qi::rule<Iterator> my_skipper;
my_skipper = blank | '!' >> *(char_ - eol) >> (eol|eoi);
然后用它代替 skip(blank)
像 skip(my_skipper)
Q. Why doesn't my error handler get called when I give it invalid input?
- 因为你没有标出期望值(
operator>
而不是operator>>
)。如果不这样做,则匹配子表达式失败只会回溯。
Q. How can I make the command tokens case insensitive, but not change the contents of a quoted string?
- 见上文
我正在为一种命令语言构建一个解析器,它是我从各种示例中拼凑而成的。我已经阅读了 Boost Spirit Qi 和 Lex 文档,我认为我了解基础知识,但根据我所阅读的内容,我应该避免属性并使用 utree。我在 utree 上找到的文档基本上很糟糕。鉴于下面的代码,我有以下问题:
- 如何注释解析器以使用 utree 创建 AST?
- 如何在构建后遍历 utree 以发现解析的内容?例如对于仅令牌命令,例如 SET DEBUG ON,以及带有值的命令,例如 LOAD "file.ext" 或 SET DRIVE C:
- 我想添加一个注释字符,“!”。那么,我怎样才能忽略之后的所有内容 - 除非它出现在带引号的字符串中?
- 为什么我的错误处理程序在我输入无效时没有被调用?
如何使命令标记不区分大小写,但不更改带引号的字符串的内容?
#include <Windows.h> #include <conio.h> #include <string> #include <vector> #include <iostream> #define BOOST_SPIRIT_DEBUG #include <boost\spirit\include\qi.hpp> #include <boost\spirit\include\phoenix.hpp> #include <boost\spirit\include\lex.hpp> #include <boost\spirit\include\lex_lexertl.hpp> using namespace std; using namespace boost::spirit; using boost::spirit::utree; // // Tokens used by the command grammar // template <typename Lexer> struct command_tokens : lex::lexer <Lexer> { command_tokens () : // // Verbs, with abbreviation (just enough characters to make each unique) // boot ("B(O(O(T)?)?)?"), exit ("E(X(I(T)?)?)?"), help ("H(E(L(P)?)?)?"), dash_help ("-H(E(L(P)?)?)?"), slash_help ("\/H(E(L(P)?)?)?"), load ("L(O(A(D)?)?)?"), quit ("Q(U(I(T)?)?)?"), set ("SE(T)?"), show ("SH(O(W)?)?"), // // Nouns, with abbreviation (the minimum number of characters is usually 3, but may be more to ensure uniqueness) // debug ("DEB(U(G)?)?"), drive ("DRI(V(E)?)?"), trace ("TRA(C(E)?)?"), // // Qualifiers // on ("ON"), off ("OFF"), // // Tokens to pass back to the grammar // quoted_string ("...") { using namespace boost::spirit::lex; // // Associate the tokens with the lexer // this->self = boot | exit | help | dash_help | slash_help | load | quit | set | show | debug | drive | trace | off | on | quoted_string ; // // Define whitespace to ignore: space, tab, newline // this->self ("WS") = lex::token_def <> ("[ \t\n]+") ; } lex::token_def <> boot; lex::token_def <> dash_help; lex::token_def <> debug; lex::token_def <string> drive; lex::token_def <> exit; lex::token_def <> help; lex::token_def <> load; lex::token_def <> off; lex::token_def <> on; lex::token_def <> quit; lex::token_def <string> quoted_string; lex::token_def <> set; lex::token_def <> show; lex::token_def <> slash_help; lex::token_def <> trace; }; // // Display parse error // struct error_handler_ { template <typename, typename, typename> struct result { typedef void type; }; template <typename Iterator> void operator () ( qi::info const& What, Iterator Err_pos, Iterator Last ) const { cout << "Error! Expecting " << What << " here: \"" << string (Err_pos, Last) << "\"" << endl; } }; boost::phoenix::function <error_handler_> const error_handler = error_handler_ (); // // Grammar describing the valid commands // template <typename Iterator, typename Lexer> struct command_grammar : qi::grammar <Iterator> { template <typename Lexer> command_grammar (command_tokens <Lexer> const& Tok) : command_grammar::base_type (start) { using qi::on_error; using qi::fail; using qi::char_; start = +commands; commands = ( boot_command | exit_command | help_command | load_command | set_command | show_command ); boot_command = Tok.boot; exit_command = Tok.exit | Tok.quit; help_command = Tok.help | Tok.dash_help | Tok.slash_help; load_command = Tok.load >> Tok.quoted_string; set_command = Tok.set; show_command = Tok.show; set_property = debug_property | drive_property | trace_property; debug_property = Tok.debug >> on_off; drive_property = Tok.drive >> char_ ("A-Z") >> char_ (":"); trace_property = Tok.trace >> on_off; on_off = Tok.on | Tok.off; BOOST_SPIRIT_DEBUG_NODE (start); BOOST_SPIRIT_DEBUG_NODE (commands); BOOST_SPIRIT_DEBUG_NODE (boot_command); BOOST_SPIRIT_DEBUG_NODE (exit_command); BOOST_SPIRIT_DEBUG_NODE (help_command); BOOST_SPIRIT_DEBUG_NODE (load_command); BOOST_SPIRIT_DEBUG_NODE (quit_command); BOOST_SPIRIT_DEBUG_NODE (set_command); BOOST_SPIRIT_DEBUG_NODE (show_command); BOOST_SPIRIT_DEBUG_NODE (set_property); BOOST_SPIRIT_DEBUG_NODE (debug_property); BOOST_SPIRIT_DEBUG_NODE (drive_property); BOOST_SPIRIT_DEBUG_NODE (trace_property); BOOST_SPIRIT_DEBUG_NODE (target_property); on_error <fail> (start, error_handler (_4, _3, _2)); } qi::rule <Iterator> start; qi::rule <Iterator> commands; qi::rule <Iterator> boot_command; qi::rule <Iterator> exit_command; qi::rule <Iterator> help_command; qi::rule <Iterator> load_command; qi::rule <Iterator> quit_command; qi::rule <Iterator> set_command; qi::rule <Iterator> show_command; qi::rule <Iterator> set_property; qi::rule <Iterator> debug_property; qi::rule <Iterator, string ()> drive_property; qi::rule <Iterator> target_property; qi::rule <Iterator> trace_property; qi::rule <Iterator> on_off; }; int main ( int Argc, PCHAR Argv ) { typedef std::string::iterator base_iterator_type; typedef lex::lexertl::token <base_iterator_type> token_type; typedef lex::lexertl::lexer <token_type> lexer_type; typedef command_tokens <lexer_type> command_tokens; typedef command_tokens::iterator_type iterator_type; typedef command_grammar <iterator_type, command_tokens::lexer_def> command_grammar; command_tokens tokens; command_grammar commands (tokens); string input = "SET DRIVE C:"; string::iterator it = input.begin (); iterator_type iter = tokens.begin (it, input.end ()); iterator_type end = tokens.end (); string ws ("WS"); bool result = lex::tokenize_and_phrase_parse (it, input.end (), tokens, commands, qi::in_state (ws) [tokens.self]); if (result) { cout << "Parse succeeded" << endl; } else { string rest (it, input.end ()); cout << "Parse failed" << endl; cout << "Stopped at " << rest << endl; } return 0; } // End of main
我将避开您的大部分代码,原因很简单,经验告诉我 Lex
和 utree
通常不是您想要使用的。
你想要的是定义一个 AST 来表示你的命令语言,然后想出一个语法来构建它。
AST
namespace Ast {
struct NoValue {
bool operator==(NoValue const &) const { return true; }
};
template <typename Tag> struct GenericCommand {};
namespace tag {
struct boot;
struct help;
struct load;
struct exit;
struct set;
struct show;
};
template <> struct GenericCommand<tag::load> { std::string name; };
template <> struct GenericCommand<tag::set> {
std::string property;
boost::variant<NoValue, std::string, bool> value; // optional
};
using BootCmd = GenericCommand<tag::boot>;
using HelpCmd = GenericCommand<tag::help>;
using ExitCmd = GenericCommand<tag::exit>;
using ShowCmd = GenericCommand<tag::show>;
using LoadCmd = GenericCommand<tag::load>;
using SetCmd = GenericCommand<tag::set>;
using Command = boost::variant<BootCmd, HelpCmd, ExitCmd, ShowCmd, LoadCmd, SetCmd>;
using Commands = std::list<Command>;
}
完整代码仅添加了调试输出助手。这是完整的融合改编:
BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::GenericCommand) (Tag), )
BOOST_FUSION_ADAPT_STRUCT(Ast::LoadCmd, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::SetCmd, property, value)
语法
这里我做一些选择:
让我们把事情变白-space 并且不区分大小写,允许行分隔的命令:(另请参阅 Boost spirit skipper issues)
start = skip(blank) [lazy_command % eol];
让我们使用Nabialek Trick将命令与前缀关联起来。我使用了一段非常简单的代码来生成唯一的前缀:
std::set<std::string> const verbs { "boot", "exit", "help", "-help", "/help", "load", "quit", "set", "show", }; for (auto const full : verbs) for (auto partial=full; partial.length(); partial.resize(partial.size()-1)) { auto n = std::distance(verbs.lower_bound(partial), verbs.upper_bound(full)); if (n < 2) std::cout << "(\"" << partial << "\", &" << full << "_command)\n"; }
您可以对属性执行相同的操作,但我认为当前设置更简单:
template <typename Iterator>
struct command_grammar : qi::grammar<Iterator, Ast::Commands()> {
command_grammar() : command_grammar::base_type(start) {
using namespace qi;
start = skip(blank) [lazy_command % eol];
// nabialek trick
lazy_command = no_case [ commands [ _a = _1 ] > lazy(*_a) [ _val = _1 ] ];
on_off.add("on", true)("off", false);
commands.add
("-help", &help_command) ("-hel", &help_command) ("-he", &help_command) ("-h", &help_command)
("/help", &help_command) ("/hel", &help_command) ("/he", &help_command) ("/h", &help_command)
("help", &help_command) ("hel", &help_command) ("he", &help_command) ("h", &help_command)
("boot", &boot_command) ("boo", &boot_command) ("bo", &boot_command) ("b", &boot_command)
("exit", &exit_command) ("exi", &exit_command) ("ex", &exit_command) ("e", &exit_command)
("quit", &exit_command) ("qui", &exit_command) ("qu", &exit_command) ("q", &exit_command)
("load", &load_command) ("loa", &load_command) ("lo", &load_command) ("l", &load_command)
("set", &set_command) ("se", &set_command)
("show", &show_command) ("sho", &show_command) ("sh", &show_command);
quoted_string = '"' >> +~char_('"') >> '"';
// nullary commands
boot_command_ = eps;
exit_command_ = eps;
help_command_ = eps;
show_command_ = eps;
// non-nullary commands
load_command_ = quoted_string;
drive_ = char_("A-Z") >> ':';
set_command_ = no_case[lit("drive")|"driv"|"dri"|"dr"] >> attr("DRIVE") >> drive_
| no_case[ (lit("debug")|"debu"|"deb"|"de") >> attr("DEBUG") >> on_off ]
| no_case[ (lit("trace")|"trac"|"tra"|"tr"|"t") >> attr("TRACE") >> on_off ]
;
BOOST_SPIRIT_DEBUG_NODES(
(start)(lazy_command)
(boot_command) (exit_command) (help_command) (show_command) (set_command) (load_command)
(boot_command_)(exit_command_)(help_command_)(show_command_)(set_command_)(load_command_)
(quoted_string)(drive_)
)
on_error<fail>(start, error_handler_(_4, _3, _2));
on_error<fail>(lazy_command, error_handler_(_4, _3, _2));
boot_command = boot_command_;
exit_command = exit_command_;
help_command = help_command_;
load_command = load_command_;
exit_command = exit_command_;
set_command = set_command_;
show_command = show_command_;
}
private:
struct error_handler_t {
template <typename...> struct result { typedef void type; };
void operator()(qi::info const &What, Iterator Err_pos, Iterator Last) const {
std::cout << "Error! Expecting " << What << " here: \"" << std::string(Err_pos, Last) << "\"" << std::endl;
}
};
boost::phoenix::function<error_handler_t> const error_handler_ = error_handler_t {};
qi::rule<Iterator, Ast::Commands()> start;
using Skipper = qi::blank_type;
using CommandRule = qi::rule<Iterator, Ast::Command(), Skipper>;
qi::symbols<char, bool> on_off;
qi::symbols<char, CommandRule const*> commands;
qi::rule<Iterator, std::string()> drive_property, quoted_string, drive_;
qi::rule<Iterator, Ast::Command(), Skipper, qi::locals<CommandRule const*> > lazy_command;
CommandRule boot_command, exit_command, help_command, load_command, set_command, show_command;
qi::rule<Iterator, Ast::BootCmd(), Skipper> boot_command_;
qi::rule<Iterator, Ast::ExitCmd(), Skipper> exit_command_;
qi::rule<Iterator, Ast::HelpCmd(), Skipper> help_command_;
qi::rule<Iterator, Ast::LoadCmd(), Skipper> load_command_;
qi::rule<Iterator, Ast::SetCmd(), Skipper> set_command_;
qi::rule<Iterator, Ast::ShowCmd(), Skipper> show_command_;
};
测试用例
int main() {
typedef std::string::const_iterator It;
command_grammar<It> const commands;
for (std::string const input : {
"help",
"set drive C:",
"SET DRIVE C:",
"loAD \"XYZ\"",
"load \"anything \nat all\"",
// multiline
"load \"ABC\"\nhelp\n-he\n/H\nsh\nse t off\nse debug ON\nb\nq"
})
{
std::cout << "----- '" << input << "' -----\n";
It f = input.begin(), l = input.end();
Ast::Commands parsed;
bool result = parse(f, l, commands, parsed);
if (result) {
for (auto& cmd : parsed) {
std::cout << "Parsed " << cmd << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed '" << std::string(f, l) << "'\n";
}
}
}
打印:
----- 'help' -----
Parsed HELP ()
----- 'set drive C:' -----
Parsed SET (DRIVE C)
----- 'SET DRIVE C:' -----
Parsed SET (DRIVE C)
----- 'loAD "XYZ"' -----
Parsed LOAD (XYZ)
----- 'load "anything
at all"' -----
Parsed LOAD (anything
at all)
----- 'load "ABC"
help
-he
/H
sh
se t off
se debug ON
b
q' -----
Parsed LOAD (ABC)
Parsed HELP ()
Parsed HELP ()
Parsed HELP ()
Parsed SHOW ()
Parsed SET (TRACE 0)
Parsed SET (DEBUG 1)
Parsed BOOT ()
Parsed EXIT ()
完整列表
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
namespace Ast {
struct NoValue {
bool operator==(NoValue const &) const { return true; }
friend std::ostream& operator<<(std::ostream& os, NoValue) { return os; }
};
template <typename Tag> struct GenericCommand {};
namespace tag {
struct boot {};
struct help {};
struct load {};
struct exit {};
struct set {};
struct show {};
static std::ostream& operator<<(std::ostream& os, boot) { return os << "BOOT"; }
static std::ostream& operator<<(std::ostream& os, help) { return os << "HELP"; }
static std::ostream& operator<<(std::ostream& os, load) { return os << "LOAD"; }
static std::ostream& operator<<(std::ostream& os, exit) { return os << "EXIT"; }
static std::ostream& operator<<(std::ostream& os, set ) { return os << "SET"; }
static std::ostream& operator<<(std::ostream& os, show) { return os << "SHOW"; }
};
template <> struct GenericCommand<tag::load> { std::string name; };
template <> struct GenericCommand<tag::set> {
std::string property;
boost::variant<NoValue, std::string, bool> value; // optional
};
using BootCmd = GenericCommand<tag::boot>;
using HelpCmd = GenericCommand<tag::help>;
using ExitCmd = GenericCommand<tag::exit>;
using ShowCmd = GenericCommand<tag::show>;
using LoadCmd = GenericCommand<tag::load>;
using SetCmd = GenericCommand<tag::set>;
using Command = boost::variant<BootCmd, HelpCmd, ExitCmd, ShowCmd, LoadCmd, SetCmd>;
using Commands = std::list<Command>;
template <typename Tag>
static inline std::ostream& operator<<(std::ostream& os, Ast::GenericCommand<Tag> const& command) {
return os << Tag{} << " " << boost::fusion::as_vector(command);
}
}
BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::GenericCommand) (Tag), )
BOOST_FUSION_ADAPT_STRUCT(Ast::LoadCmd, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::SetCmd, property, value)
template <typename Iterator>
struct command_grammar : qi::grammar<Iterator, Ast::Commands()> {
command_grammar() : command_grammar::base_type(start) {
using namespace qi;
start = skip(blank) [lazy_command % eol];
// nabialek trick
lazy_command = no_case [ commands [ _a = _1 ] > lazy(*_a) [ _val = _1 ] ];
on_off.add("on", true)("off", false);
commands.add
("-help", &help_command) ("-hel", &help_command) ("-he", &help_command) ("-h", &help_command)
("/help", &help_command) ("/hel", &help_command) ("/he", &help_command) ("/h", &help_command)
("help", &help_command) ("hel", &help_command) ("he", &help_command) ("h", &help_command)
("boot", &boot_command) ("boo", &boot_command) ("bo", &boot_command) ("b", &boot_command)
("exit", &exit_command) ("exi", &exit_command) ("ex", &exit_command) ("e", &exit_command)
("quit", &exit_command) ("qui", &exit_command) ("qu", &exit_command) ("q", &exit_command)
("load", &load_command) ("loa", &load_command) ("lo", &load_command) ("l", &load_command)
("set", &set_command) ("se", &set_command)
("show", &show_command) ("sho", &show_command) ("sh", &show_command);
quoted_string = '"' >> +~char_('"') >> '"';
// nullary commands
boot_command_ = eps;
exit_command_ = eps;
help_command_ = eps;
show_command_ = eps;
// non-nullary commands
load_command_ = quoted_string;
drive_ = char_("A-Z") >> ':';
set_command_ = no_case[lit("drive")|"driv"|"dri"|"dr"] >> attr("DRIVE") >> drive_
| no_case[ (lit("debug")|"debu"|"deb"|"de") >> attr("DEBUG") >> on_off ]
| no_case[ (lit("trace")|"trac"|"tra"|"tr"|"t") >> attr("TRACE") >> on_off ]
;
BOOST_SPIRIT_DEBUG_NODES(
(start)(lazy_command)
(boot_command) (exit_command) (help_command) (show_command) (set_command) (load_command)
(boot_command_)(exit_command_)(help_command_)(show_command_)(set_command_)(load_command_)
(quoted_string)(drive_)
)
on_error<fail>(start, error_handler_(_4, _3, _2));
on_error<fail>(lazy_command, error_handler_(_4, _3, _2));
boot_command = boot_command_;
exit_command = exit_command_;
help_command = help_command_;
load_command = load_command_;
exit_command = exit_command_;
set_command = set_command_;
show_command = show_command_;
}
private:
struct error_handler_t {
template <typename...> struct result { typedef void type; };
void operator()(qi::info const &What, Iterator Err_pos, Iterator Last) const {
std::cout << "Error! Expecting " << What << " here: \"" << std::string(Err_pos, Last) << "\"" << std::endl;
}
};
boost::phoenix::function<error_handler_t> const error_handler_ = error_handler_t {};
qi::rule<Iterator, Ast::Commands()> start;
using Skipper = qi::blank_type;
using CommandRule = qi::rule<Iterator, Ast::Command(), Skipper>;
qi::symbols<char, bool> on_off;
qi::symbols<char, CommandRule const*> commands;
qi::rule<Iterator, std::string()> drive_property, quoted_string, drive_;
qi::rule<Iterator, Ast::Command(), Skipper, qi::locals<CommandRule const*> > lazy_command;
CommandRule boot_command, exit_command, help_command, load_command, set_command, show_command;
qi::rule<Iterator, Ast::BootCmd(), Skipper> boot_command_;
qi::rule<Iterator, Ast::ExitCmd(), Skipper> exit_command_;
qi::rule<Iterator, Ast::HelpCmd(), Skipper> help_command_;
qi::rule<Iterator, Ast::LoadCmd(), Skipper> load_command_;
qi::rule<Iterator, Ast::SetCmd(), Skipper> set_command_;
qi::rule<Iterator, Ast::ShowCmd(), Skipper> show_command_;
};
int main() {
typedef std::string::const_iterator It;
command_grammar<It> const commands;
for (std::string const input : {
"help",
"set drive C:",
"SET DRIVE C:",
"loAD \"XYZ\"",
"load \"anything \nat all\"",
// multiline
"load \"ABC\"\nhelp\n-he\n/H\nsh\nse t off\nse debug ON\nb\nq"
})
{
std::cout << "----- '" << input << "' -----\n";
It f = input.begin(), l = input.end();
Ast::Commands parsed;
bool result = parse(f, l, commands, parsed);
if (result) {
for (auto& cmd : parsed) {
std::cout << "Parsed " << cmd << "\n";
}
} else {
std::cout << "Parse failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed '" << std::string(f, l) << "'\n";
}
}
}
POST-脚本
Q. How do I annotate the parser to create an AST using utree?
- 见上文
Q. How do I walk the utree after it is built, to discover what was parsed?
Q. I want to add a comment character, "!". So, how can I ignore everything after that - except when it occurs in a quoted string?
只需让
Skipper
类型的规则解析例如:qi::rule<Iterator> my_skipper; my_skipper = blank | '!' >> *(char_ - eol) >> (eol|eoi);
然后用它代替
skip(blank)
像skip(my_skipper)
Q. Why doesn't my error handler get called when I give it invalid input?
- 因为你没有标出期望值(
operator>
而不是operator>>
)。如果不这样做,则匹配子表达式失败只会回溯。
Q. How can I make the command tokens case insensitive, but not change the contents of a quoted string?
- 见上文