使用 Boost::Spirit 解析时出现段错误

Segfault when parsing with Boost::Spirit

我正在尝试使用 Boost::Spirit 编写语言解析器。我阅读了教程并尝试使用以下代码来解析具有以下语法的函数:def myfunc(arg1 type1, arg2, type2 ...) return_type:

AST:

namespace ast {

enum Type { BOOL, INT32, FLOAT32 };

using Identifier = std::string;

using TypedIdentifier = std::tuple<Identifier, Type>;

using ArgList = std::vector<TypedIdentifier>;

using FunctionDef = std::tuple<Identifier, ArgList, Type>;
}

解析器:

namespace parser {

struct Identifier
    : qi::grammar<string::iterator, ast::Identifier(), ascii::space_type> {
  Identifier() : Identifier::base_type(start) {
    start = qi::char_("[a-zA-Z_]") >> *qi::char_("[a-zA-Z_0-9]");
  }
  qi::rule<string::iterator, ast::Identifier(), ascii::space_type> start;
};

struct Type : qi::symbols<char, ast::Type> {
  Type() {
    add("int32", ast::INT32)("float32", ast::FLOAT32)("bool", ast::BOOL);
  }
};

struct TypedIdentifier
    : qi::grammar<string::iterator, ast::TypedIdentifier(), ascii::space_type> {
  TypedIdentifier() : TypedIdentifier::base_type(start) {
    start = Identifier() >> Type();
  }
  qi::rule<string::iterator, ast::TypedIdentifier(), ascii::space_type> start;
};

struct FunctionDef
    : qi::grammar<string::iterator, ast::FunctionDef(), ascii::space_type> {
  FunctionDef() : FunctionDef::base_type(start) {
    start = "def" >> Identifier() >> "(" >> (TypedIdentifier() % ",") >> ")" >>
            Type() >> ":";
  }
  qi::rule<string::iterator, ast::FunctionDef(), ascii::space_type> start;
};
}

然后我在尝试解析截取的简单代码时遇到段错误。尝试解析函数定义时发生段错误,但我调试了一下,尝试解析类型化标识符时已经发生段错误。

int main() {
  string foo("foo int32");
  auto begin = foo.begin();
  auto end = foo.end();
  ast::TypedIdentifier id;
  bool result = qi::phrase_parse(begin, end, parser::TypedIdentifier(),
                                 ascii::space, id);
  cout << "Parse " << (result ? "successful " : "failed ") << endl;
  return 0;
}

我测试了标识符和类型解析器,它们可以独立运行。 我还尝试定义全局语法而不是实例化新语法,但我也遇到了段错误。 我在这里做错了什么?

链接的答案¹确实显示出了问题(语法规则中有对临时变量的引用)。

我建议您不需要为每个作品都创建 grammar<> 个实例。相反,将它们作为规则分组到语法中更有效(也更优雅):

Live On Coliru

#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/std_tuple.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

namespace ast {

    enum Type { BOOL, INT32, FLOAT32 };

    using Identifier      = std::string;
    using TypedIdentifier = std::tuple<Identifier, Type>;
    using ArgList         = std::vector<TypedIdentifier>;
    using FunctionDef     = std::tuple<Identifier, ArgList, Type>;

    std::ostream& operator<<(std::ostream& os, Type v) {
        switch (v) {
            case BOOL:    return os << "BOOL";
            case INT32:   return os << "INT32";
            case FLOAT32: return os << "FLOAT32";
        }
        return os << "?";
    }

    template <typename... Ts> std::ostream& operator<<(std::ostream& os, std::tuple<Ts...> const& v) {
        return os << boost::fusion::as_vector(v);
    }

    template <typename T> std::ostream& operator<<(std::ostream& os, std::vector<T> const& v) {
        os << "{";
        for (auto& el : v) os << el << " ";
        return os << "}";
    }
}

namespace parser {

    template <typename Iterator>
    struct MyGrammarImpl : qi::grammar<Iterator, ast::FunctionDef(), ascii::space_type> {
        MyGrammarImpl() : MyGrammarImpl::base_type(functionDef) 
        {
            identifier      = qi::char_("[a-zA-Z_]") >> *qi::char_("[a-zA-Z_0-9]");
            typedIdentifier = identifier >> type;
            functionDef     = "def" >> identifier >> '(' >> (typedIdentifier % ",") >> ')' >> type >> ":";
            type            = type_;

            BOOST_SPIRIT_DEBUG_NODES((identifier)(typedIdentifier)(type)(functionDef))
        }
      private:
        qi::rule<Iterator, ast::TypedIdentifier(), ascii::space_type> typedIdentifier;
        qi::rule<Iterator, ast::FunctionDef(),     ascii::space_type> functionDef;
        qi::rule<Iterator, ast::Type(),            ascii::space_type> type;

        // lexemes
        qi::rule<Iterator, ast::Identifier()> identifier;

        struct Type : qi::symbols<char, ast::Type> {
            Type() {
                add("int32", ast::INT32)
                   ("float32", ast::FLOAT32)
                   ("bool", ast::BOOL)
                   ;
            }
        };

        Type type_;
    };

    using MyGrammar = MyGrammarImpl<std::string::const_iterator>;
}

int main() {
    std::string const foo("def bar(foo int32 ) bool:");

    auto begin = foo.begin();
    auto end = foo.end();

    ast::FunctionDef def;

    bool result = qi::phrase_parse(begin, end, parser::MyGrammar(), ascii::space, def);

    std::cout << "Parse " << (result ? "successful " : "failed ") << std::endl;
    if (result)
        std::cout << def << "\n";
}

打印:

Parse successful 
(bar {(foo INT32) } BOOL)

有调试信息:

<functionDef>
<try>def bar(foo int32 ) </try>
<identifier>
    <try>bar(foo int32 ) bool</try>
    <success>(foo int32 ) bool:</success>
    <attributes>[[b, a, r]]</attributes>
</identifier>
<typedIdentifier>
    <try>foo int32 ) bool:</try>
    <identifier>
    <try>foo int32 ) bool:</try>
    <success> int32 ) bool:</success>
    <attributes>[[f, o, o]]</attributes>
    </identifier>
    <type>
    <try> int32 ) bool:</try>
    <success> ) bool:</success>
    <attributes>[INT32]</attributes>
    </type>
    <success> ) bool:</success>
    <attributes>[[[f, o, o], INT32]]</attributes>
</typedIdentifier>
<type>
    <try> bool:</try>
    <success>:</success>
    <attributes>[BOOL]</attributes>
</type>
<success></success>
<attributes>[[[b, a, r], [[[f, o, o], INT32]], BOOL]]</attributes>
</functionDef>

¹ Internal Boost::Spirit code segfaults when parsing a composite grammar