使用 Boost Spirit 解析命令语言

Parsing a command language using Boost Spirit

我正在为一种命令语言构建一个解析器,它是我从各种示例中拼凑而成的。我已经阅读了 Boost Spirit Qi 和 Lex 文档,我认为我了解基础知识,但根据我所阅读的内容,我应该避免属性并使用 utree。我在 utree 上找到的文档基本上很糟糕。鉴于下面的代码,我有以下问题:

  1. 如何注释解析器以使用 utree 创建 AST?
  2. 如何在构建后遍历 utree 以发现解析的内容?例如对于仅令牌命令,例如 SET DEBUG ON,以及带有值的命令,例如 LOAD "file.ext" 或 SET DRIVE C:
  3. 我想添加一个注释字符,“!”。那么,我怎样才能忽略之后的所有内容 - 除非它出现在带引号的字符串中?
  4. 为什么我的错误处理程序在我输入无效时没有被调用?
  5. 如何使命令标记不区分大小写,但不更改带引号的字符串的内容?

    #include <Windows.h>
    #include <conio.h>
    #include <string>
    #include <vector>
    #include <iostream>
    
    #define BOOST_SPIRIT_DEBUG
    
    #include <boost\spirit\include\qi.hpp>
    #include <boost\spirit\include\phoenix.hpp>
    #include <boost\spirit\include\lex.hpp>
    #include <boost\spirit\include\lex_lexertl.hpp>
    
    using namespace std;
    using namespace boost::spirit;
    using boost::spirit::utree;
    
    //
    // Tokens used by the command grammar
    //
    
    template <typename Lexer>
    struct command_tokens : lex::lexer <Lexer>
        {
        command_tokens () :
    
            //
            // Verbs, with abbreviation (just enough characters to make each unique)
            //
    
            boot        ("B(O(O(T)?)?)?"),
            exit        ("E(X(I(T)?)?)?"),
            help        ("H(E(L(P)?)?)?"),
            dash_help   ("-H(E(L(P)?)?)?"),
            slash_help  ("\/H(E(L(P)?)?)?"),
            load        ("L(O(A(D)?)?)?"),
            quit        ("Q(U(I(T)?)?)?"),
            set         ("SE(T)?"),
            show        ("SH(O(W)?)?"),
    
            //
            // Nouns, with abbreviation (the minimum number of characters is usually 3, but may be more to ensure uniqueness)
            //
    
            debug       ("DEB(U(G)?)?"),
            drive       ("DRI(V(E)?)?"),
            trace       ("TRA(C(E)?)?"),
    
            //
            // Qualifiers
            //
    
            on          ("ON"),
            off         ("OFF"),
    
            //
            // Tokens to pass back to the grammar
            //
    
            quoted_string   ("...")
    
            {
            using namespace boost::spirit::lex;
    
            //
            // Associate the tokens with the lexer
            //
    
            this->self 
                = boot
                | exit
                | help
                | dash_help
                | slash_help
                | load
                | quit
                | set
                | show
                | debug
                | drive
                | trace
                | off
                | on
                | quoted_string
                ;
    
            //
            // Define whitespace to ignore: space, tab, newline
            //
    
            this->self ("WS")
                = lex::token_def <> ("[ \t\n]+")
                ;
            }
    
        lex::token_def <>   boot;
        lex::token_def <>   dash_help;
        lex::token_def <>   debug;
        lex::token_def <string> drive;
        lex::token_def <>   exit;
        lex::token_def <>   help;
        lex::token_def <>   load;
        lex::token_def <>   off;
        lex::token_def <>   on;
        lex::token_def <>   quit;
        lex::token_def <string> quoted_string;
        lex::token_def <>   set;
        lex::token_def <>   show;
        lex::token_def <>   slash_help;
        lex::token_def <>   trace;
        };
    
    //
    // Display parse error
    //
    
    struct error_handler_
        {
        template <typename, typename, typename>
        struct result
            {
            typedef void type;
            };
    
        template <typename Iterator>
        void operator ()
            (
            qi::info const& What,
            Iterator        Err_pos,
            Iterator        Last
            ) const
    
            {
            cout << "Error! Expecting "
                << What
                << " here: \""
                << string (Err_pos, Last)
                << "\""
                << endl;
            }
        };
    
    boost::phoenix::function <error_handler_> const error_handler = error_handler_ ();
    
    //
    // Grammar describing the valid commands
    //
    
    template <typename Iterator, typename Lexer>
    struct command_grammar : qi::grammar <Iterator>
        {
        template <typename Lexer>
        command_grammar (command_tokens <Lexer> const& Tok) :
            command_grammar::base_type (start)
            {
            using qi::on_error;
            using qi::fail;
            using qi::char_;
    
            start
                = +commands;
    
            commands
                = (
                  boot_command
                | exit_command
                | help_command
                | load_command
                | set_command
                | show_command
                );
    
            boot_command
                = Tok.boot;
    
            exit_command
                = Tok.exit
                | Tok.quit;
    
            help_command
                = Tok.help
                | Tok.dash_help
                | Tok.slash_help;
    
            load_command
                = Tok.load >> Tok.quoted_string;
    
            set_command
                = Tok.set;
    
            show_command
                = Tok.show;
    
            set_property
                = debug_property
                | drive_property
                | trace_property;
    
            debug_property
                = Tok.debug >> on_off;
    
           drive_property
                = Tok.drive >> char_ ("A-Z") >> char_ (":");
    
            trace_property
                = Tok.trace >> on_off;
    
            on_off
                = Tok.on
                | Tok.off;
    
            BOOST_SPIRIT_DEBUG_NODE (start);
            BOOST_SPIRIT_DEBUG_NODE (commands);
            BOOST_SPIRIT_DEBUG_NODE (boot_command);
            BOOST_SPIRIT_DEBUG_NODE (exit_command);
            BOOST_SPIRIT_DEBUG_NODE (help_command);
            BOOST_SPIRIT_DEBUG_NODE (load_command);
            BOOST_SPIRIT_DEBUG_NODE (quit_command);
            BOOST_SPIRIT_DEBUG_NODE (set_command);
            BOOST_SPIRIT_DEBUG_NODE (show_command);
            BOOST_SPIRIT_DEBUG_NODE (set_property);
            BOOST_SPIRIT_DEBUG_NODE (debug_property);
            BOOST_SPIRIT_DEBUG_NODE (drive_property);
            BOOST_SPIRIT_DEBUG_NODE (trace_property);
            BOOST_SPIRIT_DEBUG_NODE (target_property);
            on_error <fail> (start, error_handler (_4, _3, _2));
            }
    
        qi::rule <Iterator> start;
        qi::rule <Iterator> commands;
        qi::rule <Iterator> boot_command;
        qi::rule <Iterator> exit_command;
        qi::rule <Iterator> help_command;
        qi::rule <Iterator> load_command;
        qi::rule <Iterator> quit_command;
        qi::rule <Iterator> set_command;
        qi::rule <Iterator> show_command;
        qi::rule <Iterator> set_property;
        qi::rule <Iterator> debug_property;
        qi::rule <Iterator, string ()>  drive_property;
        qi::rule <Iterator> target_property;
        qi::rule <Iterator> trace_property;
        qi::rule <Iterator> on_off;
        };
    
    int
    main
        (
        int     Argc,
        PCHAR   Argv
        )
    {
        typedef std::string::iterator                       base_iterator_type;
        typedef lex::lexertl::token <base_iterator_type>    token_type;
        typedef lex::lexertl::lexer <token_type>            lexer_type;
        typedef command_tokens <lexer_type>                 command_tokens;
        typedef command_tokens::iterator_type               iterator_type;
        typedef command_grammar <iterator_type, command_tokens::lexer_def>  command_grammar;
    
        command_tokens      tokens;
        command_grammar     commands (tokens);
        string              input = "SET DRIVE C:";
        string::iterator    it = input.begin ();
        iterator_type       iter = tokens.begin (it, input.end ());
        iterator_type       end = tokens.end ();
        string              ws ("WS");
    
        bool                result = lex::tokenize_and_phrase_parse (it, input.end (), tokens, commands, qi::in_state (ws) [tokens.self]);
    
        if (result)
            {
            cout << "Parse succeeded" << endl;
            }
        else
            {
            string  rest (it, input.end ());
            cout << "Parse failed" << endl;
            cout << "Stopped at " << rest << endl;
            }
    
        return 0;
    }                           // End of main
    

我将避开您的大部分代码,原因很简单,经验告诉我 Lexutree 通常不是您想要使用的。

你想要的是定义一个 AST 来表示你的命令语言,然后想出一个语法来构建它。

AST

namespace Ast {
    struct NoValue {
        bool operator==(NoValue const &) const { return true; }
    };
    template <typename Tag> struct GenericCommand {};

    namespace tag {
        struct boot;
        struct help;
        struct load;
        struct exit;
        struct set;
        struct show;
    };

    template <> struct GenericCommand<tag::load> { std::string name; };

    template <> struct GenericCommand<tag::set> {
        std::string property;
        boost::variant<NoValue, std::string, bool> value; // optional
    };

    using BootCmd = GenericCommand<tag::boot>;
    using HelpCmd = GenericCommand<tag::help>;
    using ExitCmd = GenericCommand<tag::exit>;
    using ShowCmd = GenericCommand<tag::show>;
    using LoadCmd = GenericCommand<tag::load>;
    using SetCmd  = GenericCommand<tag::set>;

    using Command = boost::variant<BootCmd, HelpCmd, ExitCmd, ShowCmd, LoadCmd, SetCmd>;
    using Commands = std::list<Command>;
}

完整代码仅添加了调试输出助手。这是完整的融合改编:

BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::GenericCommand) (Tag), )
BOOST_FUSION_ADAPT_STRUCT(Ast::LoadCmd, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::SetCmd, property, value)

语法

这里我做一些选择:

  • 让我们把事情变白-space 并且不区分大小写,允许行分隔的命令:(另请参阅 Boost spirit skipper issues

    start = skip(blank) [lazy_command % eol];
    
  • 让我们使用Nabialek Trick将命令与前缀关联起来。我使用了一段非常简单的代码来生成唯一的前缀:

    std::set<std::string> const verbs { "boot", "exit", "help", "-help", "/help", "load", "quit", "set", "show", };
    for (auto const full : verbs)
        for (auto partial=full; partial.length(); partial.resize(partial.size()-1)) {
            auto n = std::distance(verbs.lower_bound(partial), verbs.upper_bound(full));
            if (n < 2) std::cout << "(\"" << partial << "\", &" << full << "_command)\n";
        }
    
  • 您可以对属性执行相同的操作,但我认为当前设置更简单:

template <typename Iterator>
struct command_grammar : qi::grammar<Iterator, Ast::Commands()> {
    command_grammar() : command_grammar::base_type(start) {
        using namespace qi;

        start = skip(blank) [lazy_command % eol];

        // nabialek trick
        lazy_command = no_case [ commands [ _a = _1 ] > lazy(*_a) [ _val = _1 ] ];

        on_off.add("on", true)("off", false);

        commands.add
            ("-help", &help_command) ("-hel", &help_command) ("-he", &help_command) ("-h", &help_command)
            ("/help", &help_command) ("/hel", &help_command) ("/he", &help_command) ("/h", &help_command)
            ("help", &help_command) ("hel", &help_command) ("he", &help_command) ("h", &help_command)
            ("boot", &boot_command) ("boo", &boot_command) ("bo", &boot_command) ("b", &boot_command)
            ("exit", &exit_command) ("exi", &exit_command) ("ex", &exit_command) ("e", &exit_command)
            ("quit", &exit_command) ("qui", &exit_command) ("qu", &exit_command) ("q", &exit_command)
            ("load", &load_command) ("loa", &load_command) ("lo", &load_command) ("l", &load_command)
            ("set", &set_command) ("se", &set_command)
            ("show", &show_command) ("sho", &show_command) ("sh", &show_command);

        quoted_string = '"' >> +~char_('"') >> '"';

        // nullary commands
        boot_command_ = eps;
        exit_command_ = eps;
        help_command_ = eps;
        show_command_ = eps;

        // non-nullary commands
        load_command_ = quoted_string;
        drive_        = char_("A-Z") >> ':';
        set_command_  = no_case[lit("drive")|"driv"|"dri"|"dr"] >> attr("DRIVE") >> drive_
                | no_case[ (lit("debug")|"debu"|"deb"|"de")     >> attr("DEBUG") >> on_off ]
                | no_case[ (lit("trace")|"trac"|"tra"|"tr"|"t") >> attr("TRACE") >> on_off ]
                ;

        BOOST_SPIRIT_DEBUG_NODES(
                (start)(lazy_command)
                (boot_command) (exit_command) (help_command) (show_command) (set_command) (load_command)
                (boot_command_)(exit_command_)(help_command_)(show_command_)(set_command_)(load_command_)
                (quoted_string)(drive_)
            )

        on_error<fail>(start, error_handler_(_4, _3, _2));
        on_error<fail>(lazy_command, error_handler_(_4, _3, _2));
        boot_command = boot_command_;
        exit_command = exit_command_;
        help_command = help_command_;
        load_command = load_command_;
        exit_command = exit_command_;
        set_command  = set_command_;
        show_command = show_command_;
    }

  private:
    struct error_handler_t {
        template <typename...> struct result { typedef void type; };

        void operator()(qi::info const &What, Iterator Err_pos, Iterator Last) const {
            std::cout << "Error! Expecting " << What << " here: \"" << std::string(Err_pos, Last) << "\"" << std::endl;
        }
    };

    boost::phoenix::function<error_handler_t> const error_handler_ = error_handler_t {};

    qi::rule<Iterator, Ast::Commands()> start;

    using Skipper = qi::blank_type;
    using CommandRule  = qi::rule<Iterator, Ast::Command(), Skipper>;

    qi::symbols<char, bool> on_off;
    qi::symbols<char, CommandRule const*> commands;

    qi::rule<Iterator, std::string()> drive_property, quoted_string, drive_;

    qi::rule<Iterator, Ast::Command(), Skipper, qi::locals<CommandRule const*> > lazy_command;
    CommandRule boot_command, exit_command, help_command, load_command, set_command, show_command;

    qi::rule<Iterator, Ast::BootCmd(), Skipper> boot_command_;
    qi::rule<Iterator, Ast::ExitCmd(), Skipper> exit_command_;
    qi::rule<Iterator, Ast::HelpCmd(), Skipper> help_command_;
    qi::rule<Iterator, Ast::LoadCmd(), Skipper> load_command_;
    qi::rule<Iterator, Ast::SetCmd(),  Skipper> set_command_;
    qi::rule<Iterator, Ast::ShowCmd(), Skipper> show_command_;
};

测试用例

Live On Coliru

int main() {
    typedef std::string::const_iterator It;
    command_grammar<It> const commands;

    for (std::string const input : {
            "help",
            "set drive C:",
            "SET DRIVE C:",
            "loAD \"XYZ\"",
            "load \"anything \nat all\"",
            // multiline
            "load \"ABC\"\nhelp\n-he\n/H\nsh\nse t off\nse debug ON\nb\nq"
            })
    {
        std::cout << "----- '" << input << "' -----\n";
        It f = input.begin(), l = input.end();

        Ast::Commands parsed;
        bool result = parse(f, l, commands, parsed);

        if (result) {
            for (auto& cmd : parsed) {
                std::cout << "Parsed " << cmd << "\n";
            }
        } else {
            std::cout << "Parse failed\n";
        }

        if (f != l) {
            std::cout << "Remaining unparsed '" << std::string(f, l) << "'\n";
        }
    }
}

打印:

----- 'help' -----
Parsed HELP ()
----- 'set drive C:' -----
Parsed SET (DRIVE C)
----- 'SET DRIVE C:' -----
Parsed SET (DRIVE C)
----- 'loAD "XYZ"' -----
Parsed LOAD (XYZ)
----- 'load "anything 
at all"' -----
Parsed LOAD (anything 
at all)
----- 'load "ABC"
help
-he
/H
sh
se t off
se debug ON
b
q' -----
Parsed LOAD (ABC)
Parsed HELP ()
Parsed HELP ()
Parsed HELP ()
Parsed SHOW ()
Parsed SET (TRACE 0)
Parsed SET (DEBUG 1)
Parsed BOOT ()
Parsed EXIT ()

完整列表

Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>

namespace qi = boost::spirit::qi;

namespace Ast {
    struct NoValue {
        bool operator==(NoValue const &) const { return true; }
        friend std::ostream& operator<<(std::ostream& os, NoValue) { return os; }
    };
    template <typename Tag> struct GenericCommand {};

    namespace tag {
        struct boot {};
        struct help {};
        struct load {};
        struct exit {};
        struct set {};
        struct show {};

        static std::ostream& operator<<(std::ostream& os, boot) { return os << "BOOT"; }
        static std::ostream& operator<<(std::ostream& os, help) { return os << "HELP"; }
        static std::ostream& operator<<(std::ostream& os, load) { return os << "LOAD"; }
        static std::ostream& operator<<(std::ostream& os, exit) { return os << "EXIT"; }
        static std::ostream& operator<<(std::ostream& os, set ) { return os << "SET"; }
        static std::ostream& operator<<(std::ostream& os, show) { return os << "SHOW"; }
    };

    template <> struct GenericCommand<tag::load> { std::string name; };

    template <> struct GenericCommand<tag::set> {
        std::string property;
        boost::variant<NoValue, std::string, bool> value; // optional
    };

    using BootCmd = GenericCommand<tag::boot>;
    using HelpCmd = GenericCommand<tag::help>;
    using ExitCmd = GenericCommand<tag::exit>;
    using ShowCmd = GenericCommand<tag::show>;
    using LoadCmd = GenericCommand<tag::load>;
    using SetCmd  = GenericCommand<tag::set>;

    using Command = boost::variant<BootCmd, HelpCmd, ExitCmd, ShowCmd, LoadCmd, SetCmd>;
    using Commands = std::list<Command>;

    template <typename Tag>
    static inline std::ostream& operator<<(std::ostream& os, Ast::GenericCommand<Tag> const& command) { 
        return os << Tag{} << " " << boost::fusion::as_vector(command);
    }
}

BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::GenericCommand) (Tag), )
BOOST_FUSION_ADAPT_STRUCT(Ast::LoadCmd, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::SetCmd, property, value)

template <typename Iterator>
struct command_grammar : qi::grammar<Iterator, Ast::Commands()> {
    command_grammar() : command_grammar::base_type(start) {
        using namespace qi;

        start = skip(blank) [lazy_command % eol];

        // nabialek trick
        lazy_command = no_case [ commands [ _a = _1 ] > lazy(*_a) [ _val = _1 ] ];

        on_off.add("on", true)("off", false);

        commands.add
            ("-help", &help_command) ("-hel", &help_command) ("-he", &help_command) ("-h", &help_command)
            ("/help", &help_command) ("/hel", &help_command) ("/he", &help_command) ("/h", &help_command)
            ("help", &help_command) ("hel", &help_command) ("he", &help_command) ("h", &help_command)
            ("boot", &boot_command) ("boo", &boot_command) ("bo", &boot_command) ("b", &boot_command)
            ("exit", &exit_command) ("exi", &exit_command) ("ex", &exit_command) ("e", &exit_command)
            ("quit", &exit_command) ("qui", &exit_command) ("qu", &exit_command) ("q", &exit_command)
            ("load", &load_command) ("loa", &load_command) ("lo", &load_command) ("l", &load_command)
            ("set", &set_command) ("se", &set_command)
            ("show", &show_command) ("sho", &show_command) ("sh", &show_command);

        quoted_string = '"' >> +~char_('"') >> '"';

        // nullary commands
        boot_command_ = eps;
        exit_command_ = eps;
        help_command_ = eps;
        show_command_ = eps;

        // non-nullary commands
        load_command_ = quoted_string;
        drive_        = char_("A-Z") >> ':';
        set_command_  = no_case[lit("drive")|"driv"|"dri"|"dr"] >> attr("DRIVE") >> drive_
                | no_case[ (lit("debug")|"debu"|"deb"|"de")     >> attr("DEBUG") >> on_off ]
                | no_case[ (lit("trace")|"trac"|"tra"|"tr"|"t") >> attr("TRACE") >> on_off ]
                ;

        BOOST_SPIRIT_DEBUG_NODES(
                (start)(lazy_command)
                (boot_command) (exit_command) (help_command) (show_command) (set_command) (load_command)
                (boot_command_)(exit_command_)(help_command_)(show_command_)(set_command_)(load_command_)
                (quoted_string)(drive_)
            )

        on_error<fail>(start, error_handler_(_4, _3, _2));
        on_error<fail>(lazy_command, error_handler_(_4, _3, _2));
        boot_command = boot_command_;
        exit_command = exit_command_;
        help_command = help_command_;
        load_command = load_command_;
        exit_command = exit_command_;
        set_command  = set_command_;
        show_command = show_command_;
    }

  private:
    struct error_handler_t {
        template <typename...> struct result { typedef void type; };

        void operator()(qi::info const &What, Iterator Err_pos, Iterator Last) const {
            std::cout << "Error! Expecting " << What << " here: \"" << std::string(Err_pos, Last) << "\"" << std::endl;
        }
    };

    boost::phoenix::function<error_handler_t> const error_handler_ = error_handler_t {};

    qi::rule<Iterator, Ast::Commands()> start;

    using Skipper = qi::blank_type;
    using CommandRule  = qi::rule<Iterator, Ast::Command(), Skipper>;

    qi::symbols<char, bool> on_off;
    qi::symbols<char, CommandRule const*> commands;

    qi::rule<Iterator, std::string()> drive_property, quoted_string, drive_;

    qi::rule<Iterator, Ast::Command(), Skipper, qi::locals<CommandRule const*> > lazy_command;
    CommandRule boot_command, exit_command, help_command, load_command, set_command, show_command;

    qi::rule<Iterator, Ast::BootCmd(), Skipper> boot_command_;
    qi::rule<Iterator, Ast::ExitCmd(), Skipper> exit_command_;
    qi::rule<Iterator, Ast::HelpCmd(), Skipper> help_command_;
    qi::rule<Iterator, Ast::LoadCmd(), Skipper> load_command_;
    qi::rule<Iterator, Ast::SetCmd(),  Skipper> set_command_;
    qi::rule<Iterator, Ast::ShowCmd(), Skipper> show_command_;
};

int main() {
    typedef std::string::const_iterator It;
    command_grammar<It> const commands;

    for (std::string const input : {
            "help",
            "set drive C:",
            "SET DRIVE C:",
            "loAD \"XYZ\"",
            "load \"anything \nat all\"",
            // multiline
            "load \"ABC\"\nhelp\n-he\n/H\nsh\nse t off\nse debug ON\nb\nq"
            })
    {
        std::cout << "----- '" << input << "' -----\n";
        It f = input.begin(), l = input.end();

        Ast::Commands parsed;
        bool result = parse(f, l, commands, parsed);

        if (result) {
            for (auto& cmd : parsed) {
                std::cout << "Parsed " << cmd << "\n";
            }
        } else {
            std::cout << "Parse failed\n";
        }

        if (f != l) {
            std::cout << "Remaining unparsed '" << std::string(f, l) << "'\n";
        }
    }
}

POST-脚本

Q. How do I annotate the parser to create an AST using utree?

  • 见上文

Q. How do I walk the utree after it is built, to discover what was parsed?

Q. I want to add a comment character, "!". So, how can I ignore everything after that - except when it occurs in a quoted string?

  • 只需让 Skipper 类型的规则解析例如:

    qi::rule<Iterator> my_skipper;
    my_skipper = blank | '!' >> *(char_ - eol) >> (eol|eoi);
    

    然后用它代替 skip(blank)skip(my_skipper)

Q. Why doesn't my error handler get called when I give it invalid input?

  • 因为你没有标出期望值(operator>而不是operator>>)。如果不这样做,则匹配子表达式失败只会回溯。

Q. How can I make the command tokens case insensitive, but not change the contents of a quoted string?

  • 见上文