如果规则和解析器的属性类型不匹配,如何访问精神规则的属性?

How to access attribute of spirit rule, if attribute types of rule and parser don't match?

使用boost::spirit::qi进行解析,可以使用语义动作通过phoenix调用函数。然后可以使用 boost::qi::_val 将结果分配给规则的属性,以防规则的属性类型与分配的解析器匹配。如果类型不同,标签 qi::_val 代表顶级属性。在下面的简短工作示例中就是这种情况,作为规则 addmul return tuple<std::size_t, std::size_t> 的解析器,而规则本身期望 vector<std::size_t>

#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>

#include <iostream>
#include <string>
#include <vector>

/* useful abbreviations */
namespace ascii = boost::spirit::ascii;
namespace ph = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ql = qi::labels;


namespace parser
{

/* these functions are called by the semantic actions */
std::vector<std::size_t> add(std::size_t a, std::size_t b)
{
    return std::vector<std::size_t>{a, b, a+b};
}

std::vector<std::size_t> mul(std::size_t a, std::size_t b)
{
    return std::vector<std::size_t>{a, b, a*b};
}

/* actual grammar */
template<typename Iterator>
class Parser : public boost::spirit::qi::grammar<Iterator, std::vector<std::size_t>(), boost::spirit::ascii::space_type>
{
public:
    Parser() : Parser::base_type(packed)
    {
        packed %= *(add | mul | val) >> qi::eoi;
        add = (qi::uint_ >> '+' >> qi::uint_ >> ';')[ qi::_val = ph::bind(&parser::add, ql::_1, ql::_2) ];
        mul = (qi::uint_ >> '*' >> qi::uint_ >> ';')[ qi::_val = ph::bind(&parser::mul, ql::_1, ql::_2) ];
        val %= qi::uint_ >> ';';
    }

private:
    using Rule = boost::spirit::qi::rule<Iterator, std::vector<std::size_t>(), boost::spirit::ascii::space_type>;
    Rule packed;
    Rule add;
    Rule mul;
    Rule val;
};

} /* namespace parser */

/* MAIN PROGRAM */
int main()
{
    using Iterator = std::string::const_iterator;
    parser::Parser<Iterator> parser;
    std::vector<std::size_t> result;
    
    std::string string = "1; 2*4; 5; 6; 7+9; 10;";
    Iterator it = string.begin(), end = string.end();
    qi::phrase_parse(it, end, parser, ascii::space, result);
    
    std::cout << "[ ";
    for(auto i: result) std::cout << i << ", ";
    std::cout << "]\n";
    
    return 0;
}

输出:

[ 7, 9, 16, 10, ]

非预期但真正想要的输出将是:

[ 1, 2, 4, 8, 5, 6, 7, 9, 16, 10, ]

如您所见,分配 qi::_val = ph::bind(&add, ql::_1, ql::_2) 只是覆盖了 top rules 属性中的所有内容。我知道我可以通过附加到顶级规则属性来解决这个问题,但我想保持整洁并由规则 packed.

的解析器完成

有没有一种简单易行的方法可以将对 parser::addparser::mul 的调用结果写入它们各自的规则属性?

我通常的建议是不要将解析和求值混为一谈。这将使这变得更加简单,因为您可以在评估期间打印所有操作数。

请参阅下面的关注点分离部分。

无论如何都要这样做

你当然可以手动完成,让函数做你想做的事:

void add(Vec& vec, size_t a, size_t b) {
    vec.insert(vec.end(), {a, b, a+b});
}

void mul(Vec& vec, size_t a, size_t b) {
    vec.insert(vec.end(), {a, b, a*b});
}

然后这样称呼它们:

packed %= *((add | mul | val) >> ';') >> qi::eoi;
add = (qi::uint_ >> '+' >> qi::uint_) [add_(_val, _1, _2)];
mul = (qi::uint_ >> '*' >> qi::uint_) [mul_(_val, _1, _2)];
val = qi::repeat(1) [ qi::uint_ ];

Note a few minor tweaks, including the more elegant ph::function<> instead of ph::bind:

    ph::function<PackedF> 
        add_{&parser::add},
        mul_{&parser::mul};

看到了Live On Coliru

版画

[ 1, 2, 4, 8, 5, 6, 7, 9, 16, 10, ]

更简单?

您可以直接在语义操作中进行所有插入:

add = (qi::uint_ >> '+' >> qi::uint_) [ (
     ph::push_back(_val, _1),
     ph::push_back(_val, _2),
     ph::push_back(_val, _1 + _2)
    )
];
mul = (qi::uint_ >> '*' >> qi::uint_) [ (
     ph::push_back(_val, _1),
     ph::push_back(_val, _2),
     ph::push_back(_val, _1 * _2)
    )
];
val = qi::repeat(1) [ qi::uint_ ];

可以说这对于更动态的东西更有意义:

_binop.add("+", std::plus<size_t>{});
_binop.add("*", std::multiplies<size_t>{});
_binop.add("-", std::minus<size_t>{});
_binop.add("/", std::divides<size_t>{});
_binop.add("^", static_cast<double(*)(double, double)>(&std::pow));

bin = (qi::uint_ >> _binop >> qi::uint_) [ (
     ph::push_back(_val, _1),
     ph::push_back(_val, _3),
     ph::push_back(_val, ph::bind(_2, _1, _3))
    ) ];

这里,_binop是一个符号table:

qi::symbols<char, std::function<size_t(size_t, size_t)> > _binop;

看到了Live On Coliru

#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>

#include <iostream>
#include <string>
#include <vector>

/* useful abbreviations */
namespace ascii = boost::spirit::ascii;
namespace ph = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ql = qi::labels;

using Vec = std::vector<size_t>;

namespace parser
{
    template <typename Iterator>
    class Parser : public qi::grammar<Iterator, Vec(), ascii::space_type> {
      public:
        Parser() : Parser::base_type(packed) {
            using namespace ql;
            packed %= *((bin | val) >> ';') >> qi::eoi;

            _binop.add("+", std::plus<size_t>{});
            _binop.add("*", std::multiplies<size_t>{});
            _binop.add("-", std::minus<size_t>{});
            _binop.add("/", std::divides<size_t>{});
            _binop.add("^", static_cast<double(*)(double, double)>(&std::pow));

            bin = (qi::uint_ >> _binop >> qi::uint_) [ (
                 ph::push_back(_val, _1),
                 ph::push_back(_val, _3),
                 ph::push_back(_val, ph::bind(_2, _1, _3))
                ) ];
            val = qi::repeat(1) [ qi::uint_ ];
        }

      private:
        using Rule = qi::rule<Iterator, Vec(), ascii::space_type>;
        Rule packed, bin, val;
        qi::symbols<char, std::function<size_t(size_t, size_t)> > _binop;
    };

} /* namespace parser */

/* MAIN PROGRAM */
int main() {
    using Iterator = std::string::const_iterator;
    parser::Parser<Iterator> parser;
    Vec result;

    std::string string = "1; 2*4; 5; 6; 7+9; 10;";
    Iterator it = string.begin(), end = string.end();
    qi::phrase_parse(it, end, parser, ascii::space, result);

    std::cout << "[ ";
    for (auto i : result)
        std::cout << i << ", ";
    std::cout << "]\n";
}

打印(再次):

[ 1, 2, 4, 8, 5, 6, 7, 9, 16, 10, ]

更简单?

您可以通过重写规则来避免回溯值:

expr_list = expr % ';' >> qi::eol;

auto val = qi::copy(qi::uint_ [ ph::push_back(_val, _1) ]);
auto penultimate = *(ph::rbegin(_val)+1);

expr = val >> *(_binop >> val) 
    [ ph::push_back(_val, ph::bind(_1, penultimate, _2)) ];

这更表现力:

std::string const string = "1; 2*4; 2^7-1; 4-1*2";

结果:Live On Coliru:

[ 1, 2, 4, 8, 2, 7, 128, 1, 127, 4, 1, 3, 2, 6, ]

如您所见,这越来越倾向于通用算术表达式求值。如果你想要这样做,请确保你执行了执行运算符优先级规则的额外步骤。我在这个网站上有很多答案展示了一些方法。

关注点分离

因此,如果您不想“无论如何都这样做”,我会先解析为一个元素列表:

namespace parser {
    using Operand = std::size_t;
    struct Binary {
        Operand lhs;
        char operator_;
        Operand rhs;
    };
    using Element = boost::variant<Operand, Binary>;
    using Elements = std::vector<Element>;
    using boost::fusion::operator<<;
} // namespace parser

这很简单,因为我们不再需要单个语义动作,也不需要评估任何东西:

template <typename Iterator>
class Parser : public qi::grammar<Iterator, Elements(), qi::space_type> {
  public:
    Parser() : Parser::base_type(packed) {
        packed = (bin | qi::uint_) % ';' >> qi::eoi;
        bin = qi::uint_ >> qi::char_("-+*/^") >> qi::uint_;
    }

  private:
    qi::rule<Iterator, Elements(), qi::space_type> packed;
    qi::rule<Iterator, Binary(), qi::space_type> bin;
};

这就是我所说的简单。现在,使用它是 as 简单:

parser::Elements elements;

std::string string = "1; 2*4; 5; 6; 7+9; 10;";
Iterator it = string.begin(), end = string.end();
qi::phrase_parse(it, end, parser, qi::space, elements);

for (auto& element : elements)
    std::cout << element << "; ";

这会打印

1; (2 * 4); 5; 6; (7 + 9); 10; 

如您所见,我们知道输入已解析,我们可以对其进行评估。事实上,我们可以用不同的方式评估相同的元素:

std::vector<Operand> evaluate(Elements const& elements, bool include_literals = true) {
    struct {
        bool literals;
        std::vector<Operand> result;
        void operator()(Operand const& oper) { result.push_back(oper); }
        void operator()(Binary const& bin) { 
            if (literals) {
                operator()(bin.lhs);
                operator()(bin.rhs);
            }
            switch(bin.operator_) {
                case '+': operator()(bin.lhs + bin.rhs); break;
                case '-': operator()(bin.lhs - bin.rhs); break;
                case '*': operator()(bin.lhs * bin.rhs); break;
                case '/': operator()(bin.lhs / bin.rhs); break;
                case '^': operator()(std::pow(bin.lhs, bin.rhs)); break;
            }
        }
    } vis;

    vis.literals = include_literals;

    for (auto& el : elements)
        apply_visitor(vis, el);

    return vis.result;
}

This evaluation is pretty straight-forward, but uses a technique that might be new: the variant visitor. The standard library calls apply_visitor visit.

现在我们可以选择是否包含文字操作数:

std::cout << "\nwithout literals [ ";
for (auto i : evaluate(elements, false)) std::cout << i << ", ";
std::cout << "]\n";

std::cout << "with literals [ ";
for (auto i : evaluate(elements, true)) std::cout << i << ", ";
std::cout << "]\n";

版画

without literals [ 1, 8, 5, 6, 16, 10, ]
with literals [ 1, 2, 4, 8, 5, 6, 7, 9, 16, 10, ]

其他好处

这种方法的另一个(不明显的)好处是它可以更容易地拥有某种运算符优先级。这有点离题了,让我放纵一下一些调整和示例评估(不至于让解析器知道优先级):

Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>

#include <iostream>
#include <string>
#include <vector>

/* useful abbreviations */
namespace qi = boost::spirit::qi;

namespace parser {
    using Operand = std::size_t;
    struct Binary;
    using Element = boost::variant<Operand, boost::recursive_wrapper<Binary> >;

    struct Binary {
        Element lhs;
        char operator_;
        Element rhs;
    };

    using Elements = std::vector<Element>;
    using boost::fusion::operator<<;
} // namespace parser
BOOST_FUSION_ADAPT_STRUCT(parser::Binary, lhs, operator_, rhs)

namespace parser {
    /* actual grammar */
    template <typename Iterator>
    class Parser : public qi::grammar<Iterator, Elements(), qi::space_type> {
      public:
        Parser() : Parser::base_type(packed) {
            packed = elem % ';' >> qi::eoi;
            elem   = bin | qi::uint_;
            bin    = '(' >> elem >> qi::char_("-+*/^") >> elem >> ')';

            BOOST_SPIRIT_DEBUG_NODES((packed)(elem)(bin))
        }

      private:
        qi::rule<Iterator, Elements(), qi::space_type> packed;
        qi::rule<Iterator, Element(), qi::space_type> elem;
        qi::rule<Iterator, Binary(), qi::space_type> bin;
    };

    Elements parse(std::string const& input) {
        using Iterator = std::string::const_iterator;
        static const parser::Parser<Iterator> parser;
        Elements elements;
        qi::phrase_parse(input.begin(), input.end(), parser, qi::space, elements);
        // TODO error handling please
        return elements;
    }

    std::vector<Operand> evaluate(Elements const& elements) {
        struct {
            Operand operator()(Element const& el) { return boost::apply_visitor(*this, el); }
            Operand operator()(Operand const& oper) { return oper; }
            Operand operator()(Binary const& bin) { 
                auto lhs = operator()(bin.lhs);
                auto rhs = operator()(bin.rhs);
                switch(bin.operator_) {
                    case '+': return operator()(lhs + rhs);
                    case '-': return operator()(lhs - rhs);
                    case '*': return operator()(lhs * rhs);
                    case '/': return operator()(lhs / rhs);
                    case '^': return operator()(std::pow(lhs, rhs));
                }
                throw std::invalid_argument("operator not implemented");
            }
        } vis;

        std::vector<Operand> result;
        for (auto& el : elements) {
            result.push_back(apply_visitor(vis, el));
        }

        return result;
    }

} /* namespace parser */

/* MAIN PROGRAM */
int main() {
    auto const elements = parser::parse("1; ((2*4)+7); (2*(4+7)); ((7-4)^4);");

    for (auto& element : elements)
        std::cout << element << "; ";

    std::cout << "\nevaluated ";
    for (auto i : evaluate(elements))
        std::cout << i << ", ";
}

版画

1; ((2 * 4) + 7); (2 * (4 + 7)); ((7 - 4) ^ 4);
evaluated 1, 15, 22, 81,