使用 Spirit qi::success 回调在规则匹配后设置字段

Using Spirit qi::success callbacks to set fields after a rule match

我正在尝试使用 qi::on_success 回调() to set a field when a rule is matched. The code below is slightly adapted from this 尽管我对 rules/ast class 的轻微更改使其无法识别 _rule_name。我的意图在下面的代码中进行了注释。如果 _literal 规则匹配,我想将字段 term_type 设置为 TermType::literal,如果 _rule_name 规则匹配,则设置为 Term::rule_name匹配。

//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <iomanip>

namespace qi = boost::spirit::qi;

namespace Ast {
    enum class TermType {
        literal,
        rule_name
    };


    struct Term {
        std::string data;
        TermType term_type;
    };

    using List = std::list<Term>;
    using Expression = std::list<List>;

    struct Rule {
        Term name; // lhs
        Expression rhs;
    };

    using Syntax = std::list<Rule>;
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Term, data)
BOOST_FUSION_ADAPT_STRUCT(Ast::Rule, name, rhs)

namespace Parser {
    template<typename Iterator>
    struct BNF : qi::grammar<Iterator, Ast::Syntax()> {
        BNF() : BNF::base_type(start) {
            using namespace qi;
            _blank = blank;
            _skipper = blank | (eol >> !skip(_blank.alias())[_rule]);
            start = skip(_skipper.alias())[_rule % +eol];

            _rule = _rule_name >> "::=" >> _expression;
            _expression = _list % '|';
            _list = +(_literal | _rule_name);
            _literal = '"' >> *(_character - '"') >> '"'
                    | "'" >> *(_character - "'") >> "'";
            _character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\^_`{}~[-");
            _rule_name = '<' >> (alpha >> *(alnum | char_('-'))) >> '>';

            BOOST_SPIRIT_DEBUG_NODES(
                    (_rule)(_expression)(_list)(_literal)
                            (_character)
                            (_rule_name))
        }

        /*qi::on_success(_term, setTermTypeHandler());

        setTermTypeHandler(){
             if term is literal
                term.symbol_type = TermType::literal
            else
                term.term_type = TermType::rule_name
        }
        */

    private:
        using Skipper = qi::rule<Iterator>;
        Skipper _skipper, _blank;

        qi::rule<Iterator, Ast::Syntax()> start;
        qi::rule<Iterator, Ast::Rule(), Skipper> _rule;
        qi::rule<Iterator, Ast::Expression(), Skipper> _expression;
        qi::rule<Iterator, Ast::List(), Skipper> _list;
        // lexemes
        qi::rule<Iterator, Ast::Term()> _literal;
        qi::rule<Iterator, Ast::Term()> _rule_name;
        //  qi::rule<Iterator, std::string()>     _literal;
        qi::rule<Iterator, char()> _character;
    };
}

int main() {
    Parser::BNF<std::string::const_iterator> const parser;

    std::string const input = R"(<code>   ::=  <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e"
           | "f" | "g" | "h" | "i"
<digit>  ::= "0" | "1" | "2" | "3" |
             "4"
    )";

    auto it = input.begin(), itEnd = input.end();

    Ast::Syntax syntax;
    if (parse(it, itEnd, parser, syntax)) {
        for (auto &rule : syntax) {
            std::cout << rule.name.data << " ::= ";
            std::string sep;
            for (auto &list : rule.rhs) {
                std::cout << sep;
                for (auto &term: list) { std::cout << term.data; }
                sep = " | ";
            };
            std::cout << "\n";
        }
    } else {
        std::cout << "Failed\n";
    }

    if (it != itEnd)
        std::cout << "Remaining: " << std::quoted(std::string(it, itEnd)) << "\n";
}

由于您的结构 Term 已成为由元组 (std::string, TermType) 模拟的 Name/Literal 的可区分联合,因此我会这样做 _literal_rule_name 只需创建一个字符串,并在 TermType 后附加 qi::attr.

所以,

struct Term {
    std::string data;
    TermType term_type;
};

调整两个成员

BOOST_FUSION_ADAPT_STRUCT(Ast::Term, data, term_type)

声明相关规则:

qi::rule<Iterator, Ast::Term()>   _term;
qi::rule<Iterator, std::string()> _literal;
qi::rule<Iterator, std::string()> _rule_name;

初始化为

_list       = +_term;
_term       = _literal >> attr(Ast::TermType::literal)
            | _rule_name >> attr(Ast::TermType::rule_name);
_literal    = '"' >> *(_character - '"') >> '"'
            | "'" >> *(_character - "'") >> "'";

_character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\^_`{}~[-");
_rule_name = '<' >> (alpha >> *(alnum | char_('-'))) >> '>';

这符合我的信条,即您应该尽量避免语义操作 (Boost Spirit: "Semantic actions are evil"?) 并将复杂性降至最低。

on_success

我认为这里使用 on_success 的想法是不明智的,因为它适用于非上下文相关的操作(比如将源位置绑定到每个 AST 节点,无论类型如何)。

在这种情况下,您明确地想要添加不同信息(变体鉴别器),因此您最好将其注入它适用的解析器表达式的特定分支。

旁注?

你似乎通过推广类型来为自己复杂化事情 Rule::nameTerm(而不是 std::string,以前是 Name)。

规则的名称不能是任何其他文字,所以我建议

  1. 要么将其还原为 std::string(将其从额外类型中剥离 Name 拥有的信息)

    struct Rule {
        std::string name; // lhs 
        Expression rhs;
    };
    
  2. 使_rule_name直接合成Term(包括 TermType 进入其规则)https://godbolt.org/z/Kbb9dP

  3. 保持两全其美 Term 有一个转换 采用 Name:

    的构造函数
    explicit Term(Name other)
        : data(std::move(other))
        , term_type(TermType::rule_name)
    { }
    

使用 ADT 进行文学编程

请注意,Name 读写类型的丢失并非没有代价,因为 output became very wrong。我建议 最后一种方法(上面的项目符号 3)为您自己的变体仿真添加自定义 operator<<

friend std::ostream& operator<<(std::ostream& os, Term const& term) {
    switch(term.term_type) {
        case TermType::rule_name: return os << Name(term.data);
        case TermType::literal:   return os << std::quoted(term.data);
        default:                  return os << "?";
    }
}

现在您可以享受您自己的变体类型并再次正确输出:

Live On Compiler Explorer

//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <iomanip>

namespace qi = boost::spirit::qi;

namespace Ast {
    struct Name : std::string {
        using std::string::string;
        using std::string::operator=;
        explicit Name(std::string s) : std::string(std::move(s)) {}

        friend std::ostream& operator<<(std::ostream& os, Name const& n) {
            return os << '<' << n.c_str() << '>';
        }
    };

    enum class TermType { literal, rule_name };

    struct Term {
        std::string data;
        TermType term_type;

        Term() = default;
        explicit Term(Name other)
            : data(std::move(other))
            , term_type(TermType::rule_name)
        { }

        friend std::ostream& operator<<(std::ostream& os, Term const& term) {
            switch(term.term_type) {
                case TermType::rule_name: return os << Name(term.data);
                case TermType::literal:   return os << std::quoted(term.data);
                default:                  return os << "?";
            }
        }
    };

    using List = std::list<Term>;
    using Expression = std::list<List>;

    struct Rule {
        Name name; // lhs
        Expression rhs;
    };

    using Syntax = std::list<Rule>;
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Term, data, term_type)
BOOST_FUSION_ADAPT_STRUCT(Ast::Rule, name, rhs)

namespace Parser {
    template <typename Iterator>
    struct BNF : qi::grammar<Iterator, Ast::Syntax()> {
        BNF()
            : BNF::base_type(start)
        {
            using namespace qi;
            // clang-format off
            _blank      = blank;
            _skipper    = blank | (eol >> !skip(_blank.alias()) [ _rule ]);
            start       = skip(_skipper.alias()) [ _rule % +eol ];

            _rule       = _rule_name >> "::=" >> _expression;
            _expression = _list % '|';
            _list       = +_term;
            _term       = _literal >> attr(Ast::TermType::literal)
                        | _rule_name;
            _literal    = '"' >> *(_character - '"') >> '"'
                        | "'" >> *(_character - "'") >> "'";

            _character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\^_`{}~[-");
            _rule_name = '<' >> qi::raw[ (alpha >> *(alnum | char_('-'))) ] >> '>';

            // clang-format on
            BOOST_SPIRIT_DEBUG_NODES(
                (_rule)(_expression)(_list)(_literal)(_character)(_rule_name))
        }

      private:
        using Skipper = qi::rule<Iterator>;
        Skipper _skipper, _blank;

        qi::rule<Iterator, Ast::Syntax()>     start;
        qi::rule<Iterator, Ast::Rule(),       Skipper> _rule;
        qi::rule<Iterator, Ast::Expression(), Skipper> _expression;
        qi::rule<Iterator, Ast::List(),       Skipper> _list;
        // lexemes
        qi::rule<Iterator, Ast::Term()>   _term;
        qi::rule<Iterator, std::string()> _literal;
        qi::rule<Iterator, Ast::Name()>   _rule_name;
        qi::rule<Iterator, char()>        _character;
    };
}

int main() {
    Parser::BNF<std::string::const_iterator> const parser;

    std::string const input = R"(<code>   ::=  <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e"
           | "f" | "g" | "h" | "i"
<digit>  ::= "0" | "1" | "2" | "3" |
             "4"
    )";

    auto it = input.begin(), itEnd = input.end();

    Ast::Syntax syntax;
    if (parse(it, itEnd, parser, syntax)) {
        for (auto &rule : syntax) {
            std::cout << rule.name << " ::= ";
            std::string sep;
            for (auto &list : rule.rhs) {
                std::cout << std::exchange(sep, " | ");
                for (auto &term: list) { std::cout << term; }
            };
            std::cout << "\n";
        }
    } else {
        std::cout << "Failed\n";
    }

    if (it != itEnd)
        std::cout << "Remaining: " << std::quoted(std::string(it, itEnd)) << "\n";
}

版画

<code> ::= <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i"
<digit> ::= "0" | "1" | "2" | "3" | "4"