使用 Spirit::Qi 解析时如何将所有参数推入结果向量?

How to push all the arguments into result vector when parsing with Spirit::Qi?

我有几个脚本语言的命令,所以我需要解析它们。在解析期间,我想检查语法是否正确以及命令的类型及其参数(每个脚本命令类型的参数数量可变,因此我使用 std::vector<std::string> 来存储它们)。

我遇到了问题,因为在解析时,只有第一个字符串包含在向量中,无论字符串的实数是否存在。

此外,我必须在所有参数中使用 qi::as_string 规则才能使编译器正常工作。

接下来显示了我的项目的最小工作示例:

//#define BOOST_SPIRIT_DEBUG
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
#include <sstream>

namespace qi = boost::spirit::qi;

enum class TYPE {
    NONE,
    CMD1,
    CMD2,
    FAIL
};

struct Command {
    TYPE type = TYPE::NONE;
    std::vector<std::string> args;
};

using Commands = std::vector<Command>;

BOOST_FUSION_ADAPT_STRUCT(Command, type, args)

template <typename It>
class Parser : public qi::grammar<It, Commands()>
{
  private:
    qi::rule<It, Command(), qi::blank_type> none, cmd1, cmd2, fail;
    qi::rule<It, Commands()> start;

  public:
    Parser() : Parser::base_type(start)
    {
        using namespace qi;

        none = omit[*blank] >> &(eol | eoi)
            >> attr(TYPE::NONE)
            >> attr(std::vector<std::string>{});

        cmd1 = lit("CMD1") >> '('
            >> attr(TYPE::CMD1)
            >> as_string[lexeme[+~char_(")\r\n")]] >> ')';

        cmd2 = lit("CMD2") >> '('
            >> attr(TYPE::CMD2)
            >> as_string[lexeme[+~char_(",)\r\n")]] >> ','
            >> as_string[raw[double_]] >> ')';


        fail = omit[*~char_("\r\n")] //
            >> attr(TYPE::FAIL);

        start = skip(blank)[(none | cmd1 | cmd2 | fail) % eol] > eoi;
    }
};

Commands parse(std::string text)
{
    std::istringstream in(std::move(text));
    using It = boost::spirit::istream_iterator;

    static const Parser<It> parser;

    Commands commands;
    It first(in >> std::noskipws), last;//No white space skipping

    if (!qi::parse(first, last, parser, commands))
        // throw std::runtime_error("command parse error")
        ;

    return commands;
}

int main()
{
    std::string test{
R"(CMD1(some ad hoc text)
CMD2(identity, 25.5))"};

    try {
        auto commands = parse(test);
        std::cout << "elements: " << commands.size() << std::endl;
        std::cout << "CMD1 args: " << commands[0].args.size() << std::endl;
        std::cout << "CMD2 args: " << commands[1].args.size() << std::endl;// Error! Should be 2!!!!!

    } catch (std::exception const& e) {
        std::cout << e.what() << "\n";
    }
}

此外,这里有一个 link 到编译器资源管理器:https://godbolt.org/z/qM6KTcTTK

任何解决此问题的帮助?提前致谢

启用调试显示:https://godbolt.org/z/o3nvjz9bG

我还不够清楚。让我们添加一个参数规则:

struct Command {
    using Arg  = std::string;
    using Args = std::vector<Arg>;
    enum TYPE { NONE, CMD1, CMD2, FAIL };

    TYPE type = NONE;
    Args args;
};

qi::rule<It, Command::Arg()> arg;

none = omit[*blank] >> &(eol | eoi)
    >> attr(Command::NONE)
    /*>> attr(Command::Args{})*/;

arg  = raw[double_] | +~char_(",)\r\n");

cmd1 = lit("CMD1") >> attr(Command::CMD1) //
    >> '(' >> arg >> ')';

cmd2 = lit("CMD2") >> attr(Command::CMD2) //
    >> '(' >> arg >> ',' >> arg >> ')';

fail = omit[*~char_("\r\n")] //
    >> attr(Command::FAIL);

现在我们可以看到https://godbolt.org/z/3Kqr3K41v

  <cmd2>
    <try>CMD2(identity, 25.5)</try>
    <arg>
      <try>identity, 25.5)</try>
      <success>, 25.5)</success>
      <attributes>[[i, d, e, n, t, i, t, y]]</attributes>
    </arg>
    <arg>
      <try>25.5)</try>
      <success>)</success>
      <attributes>[[2, 5, ., 5]]</attributes>
    </arg>
    <success></success>
    <attributes>[[CMD2, [[i, d, e, n, t, i, t, y]]]]</attributes>
  </cmd2>

很明显,两个参数都被解析了,但只分配了一个。可悲的事实是,您通过调整一个双元素结构并解析 3 个元素的序列来主动混淆规则。

可以让它工作,但你会帮助它(例如使用transform_attributeattr_cast<>或单独的规则):

    arg  = raw[double_] | +~char_(",)\r\n");
    args = arg % ',';

    cmd1 = lit("CMD1") >> attr(Command::CMD1) //
        >> '(' >> arg >> ')';

    cmd2 = lit("CMD2") >> attr(Command::CMD2) //
        >> '(' >> args >> ')';

现在你得到:

  <cmd2>
    <try>CMD2(identity, 25.5)</try>
    <args>
      <try>identity, 25.5)</try>
      <arg>
        <try>identity, 25.5)</try>
        <success>, 25.5)</success>
        <attributes>[[i, d, e, n, t, i, t, y]]</attributes>
      </arg>
      <arg>
        <try> 25.5)</try>
        <success>)</success>
        <attributes>[[ , 2, 5, ., 5]]</attributes>
      </arg>
      <success>)</success>
      <attributes>[[[i, d, e, n, t, i, t, y], [ , 2, 5, ., 5]]]</attributes>
    </args>
    <success></success>
    <attributes>[[CMD2, [[i, d, e, n, t, i, t, y], [ , 2, 5, ., 5]]]]</attributes>
  </cmd2>

现在这暗示了一个明显的改进:通过简化改进语法:

    none  = omit[*blank] >> &(eol | eoi) >> attr(Command{Command::NONE, {}});
    fail  = omit[*~char_("\r\n")] >> attr(Command::FAIL);

arg   = raw[double_] | +~char_(",)\r\n");
args  = '(' >> arg % ',' >> ')';
cmd   = no_case[type_] >> -args;

start = skip(blank)[(cmd|fail) % eol] > eoi;

然后在事后对命令添加验证。

演示

Live On Compiler Explorer

//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>

namespace qi = boost::spirit::qi;

struct Command {
    using Arg  = std::string;
    using Args = std::vector<Arg>;
    enum Type { NONE, CMD1, CMD2, FAIL };

    Type type = NONE;
    Args args;

    friend std::ostream& operator<<(std::ostream& os, Type type) {
        switch(type) {
            case NONE: return os << "NONE";
            case CMD1: return os << "CMD1";
            case CMD2: return os << "CMD2";
            case FAIL: return os << "FAIL";
            default: return os << "???";
        }
    }
    friend std::ostream& operator<<(std::ostream& os, Command const& cmd) {
        os << cmd.type << "(";
        auto sep = "";
        for (auto& arg : cmd.args)
            os << std::exchange(sep, ", ") << std::quoted(arg);
        return os << ")";
    }
};
using Commands = std::vector<Command>;

BOOST_FUSION_ADAPT_STRUCT(Command, type, args)

template <typename It> struct Parser : qi::grammar<It, Commands()> {
    Parser() : Parser::base_type(start) {
        using namespace qi;

        none  = omit[*blank] >> &(eol | eoi) >> attr(Command{Command::NONE, {}});
        fail  = omit[*~char_("\r\n")] >> attr(Command::FAIL);

        arg   = raw[double_] | +~char_(",)\r\n");
        args  = '(' >> arg % ',' >> ')';
        cmd   = no_case[type] >> -args;

        start = skip(blank)[(cmd|none|fail) % eol] > eoi;

        BOOST_SPIRIT_DEBUG_NODES((start)(fail)(none)(cmd)(arg)(args))
    }

private:
    struct type_sym : qi::symbols<char, Command::Type> {
        type_sym() { this->add//
            ("cmd1", Command::CMD1)
            ("cmd2", Command::CMD2);
        }
    } type;
    qi::rule<It, Command::Arg()>            arg;
    qi::rule<It, Command::Args()>           args;
    qi::rule<It, Command(), qi::blank_type> cmd, none, fail;
    qi::rule<It, Commands()>                start;
};

Commands parse(std::string const& text)
{
    using It = std::string::const_iterator;
    static const Parser<It> parser;

    Commands commands;
    It first = text.begin(), last = text.end();

    if (!qi::parse(first, last, parser, commands))
        throw std::runtime_error("command parse error");

    return commands;
}

int main()
{
    try {
        for (auto& cmd : parse(R"(
CMD1(some ad hoc text)
this is a bogus line
cmd2(identity, 25.5))"))
            std::cout << cmd << "\n";
    } catch (std::exception const& e) {
        std::cout << e.what() << "\n";
    }
}

版画

NONE()
CMD1("some ad hoc text")
FAIL()
CMD2("identity", " 25.5")