使用 Boost::Spirit 解析时出现段错误
Segfault when parsing with Boost::Spirit
我正在尝试使用 Boost::Spirit 编写语言解析器。我阅读了教程并尝试使用以下代码来解析具有以下语法的函数:def myfunc(arg1 type1, arg2, type2 ...) return_type:
AST:
namespace ast {
enum Type { BOOL, INT32, FLOAT32 };
using Identifier = std::string;
using TypedIdentifier = std::tuple<Identifier, Type>;
using ArgList = std::vector<TypedIdentifier>;
using FunctionDef = std::tuple<Identifier, ArgList, Type>;
}
解析器:
namespace parser {
struct Identifier
: qi::grammar<string::iterator, ast::Identifier(), ascii::space_type> {
Identifier() : Identifier::base_type(start) {
start = qi::char_("[a-zA-Z_]") >> *qi::char_("[a-zA-Z_0-9]");
}
qi::rule<string::iterator, ast::Identifier(), ascii::space_type> start;
};
struct Type : qi::symbols<char, ast::Type> {
Type() {
add("int32", ast::INT32)("float32", ast::FLOAT32)("bool", ast::BOOL);
}
};
struct TypedIdentifier
: qi::grammar<string::iterator, ast::TypedIdentifier(), ascii::space_type> {
TypedIdentifier() : TypedIdentifier::base_type(start) {
start = Identifier() >> Type();
}
qi::rule<string::iterator, ast::TypedIdentifier(), ascii::space_type> start;
};
struct FunctionDef
: qi::grammar<string::iterator, ast::FunctionDef(), ascii::space_type> {
FunctionDef() : FunctionDef::base_type(start) {
start = "def" >> Identifier() >> "(" >> (TypedIdentifier() % ",") >> ")" >>
Type() >> ":";
}
qi::rule<string::iterator, ast::FunctionDef(), ascii::space_type> start;
};
}
然后我在尝试解析截取的简单代码时遇到段错误。尝试解析函数定义时发生段错误,但我调试了一下,尝试解析类型化标识符时已经发生段错误。
int main() {
string foo("foo int32");
auto begin = foo.begin();
auto end = foo.end();
ast::TypedIdentifier id;
bool result = qi::phrase_parse(begin, end, parser::TypedIdentifier(),
ascii::space, id);
cout << "Parse " << (result ? "successful " : "failed ") << endl;
return 0;
}
我测试了标识符和类型解析器,它们可以独立运行。
我还尝试定义全局语法而不是实例化新语法,但我也遇到了段错误。
我在这里做错了什么?
链接的答案¹确实显示出了问题(语法规则中有对临时变量的引用)。
我建议您不需要为每个作品都创建 grammar<>
个实例。相反,将它们作为规则分组到语法中更有效(也更优雅):
#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/std_tuple.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace ast {
enum Type { BOOL, INT32, FLOAT32 };
using Identifier = std::string;
using TypedIdentifier = std::tuple<Identifier, Type>;
using ArgList = std::vector<TypedIdentifier>;
using FunctionDef = std::tuple<Identifier, ArgList, Type>;
std::ostream& operator<<(std::ostream& os, Type v) {
switch (v) {
case BOOL: return os << "BOOL";
case INT32: return os << "INT32";
case FLOAT32: return os << "FLOAT32";
}
return os << "?";
}
template <typename... Ts> std::ostream& operator<<(std::ostream& os, std::tuple<Ts...> const& v) {
return os << boost::fusion::as_vector(v);
}
template <typename T> std::ostream& operator<<(std::ostream& os, std::vector<T> const& v) {
os << "{";
for (auto& el : v) os << el << " ";
return os << "}";
}
}
namespace parser {
template <typename Iterator>
struct MyGrammarImpl : qi::grammar<Iterator, ast::FunctionDef(), ascii::space_type> {
MyGrammarImpl() : MyGrammarImpl::base_type(functionDef)
{
identifier = qi::char_("[a-zA-Z_]") >> *qi::char_("[a-zA-Z_0-9]");
typedIdentifier = identifier >> type;
functionDef = "def" >> identifier >> '(' >> (typedIdentifier % ",") >> ')' >> type >> ":";
type = type_;
BOOST_SPIRIT_DEBUG_NODES((identifier)(typedIdentifier)(type)(functionDef))
}
private:
qi::rule<Iterator, ast::TypedIdentifier(), ascii::space_type> typedIdentifier;
qi::rule<Iterator, ast::FunctionDef(), ascii::space_type> functionDef;
qi::rule<Iterator, ast::Type(), ascii::space_type> type;
// lexemes
qi::rule<Iterator, ast::Identifier()> identifier;
struct Type : qi::symbols<char, ast::Type> {
Type() {
add("int32", ast::INT32)
("float32", ast::FLOAT32)
("bool", ast::BOOL)
;
}
};
Type type_;
};
using MyGrammar = MyGrammarImpl<std::string::const_iterator>;
}
int main() {
std::string const foo("def bar(foo int32 ) bool:");
auto begin = foo.begin();
auto end = foo.end();
ast::FunctionDef def;
bool result = qi::phrase_parse(begin, end, parser::MyGrammar(), ascii::space, def);
std::cout << "Parse " << (result ? "successful " : "failed ") << std::endl;
if (result)
std::cout << def << "\n";
}
打印:
Parse successful
(bar {(foo INT32) } BOOL)
有调试信息:
<functionDef>
<try>def bar(foo int32 ) </try>
<identifier>
<try>bar(foo int32 ) bool</try>
<success>(foo int32 ) bool:</success>
<attributes>[[b, a, r]]</attributes>
</identifier>
<typedIdentifier>
<try>foo int32 ) bool:</try>
<identifier>
<try>foo int32 ) bool:</try>
<success> int32 ) bool:</success>
<attributes>[[f, o, o]]</attributes>
</identifier>
<type>
<try> int32 ) bool:</try>
<success> ) bool:</success>
<attributes>[INT32]</attributes>
</type>
<success> ) bool:</success>
<attributes>[[[f, o, o], INT32]]</attributes>
</typedIdentifier>
<type>
<try> bool:</try>
<success>:</success>
<attributes>[BOOL]</attributes>
</type>
<success></success>
<attributes>[[[b, a, r], [[[f, o, o], INT32]], BOOL]]</attributes>
</functionDef>
¹ Internal Boost::Spirit code segfaults when parsing a composite grammar
我正在尝试使用 Boost::Spirit 编写语言解析器。我阅读了教程并尝试使用以下代码来解析具有以下语法的函数:def myfunc(arg1 type1, arg2, type2 ...) return_type:
AST:
namespace ast {
enum Type { BOOL, INT32, FLOAT32 };
using Identifier = std::string;
using TypedIdentifier = std::tuple<Identifier, Type>;
using ArgList = std::vector<TypedIdentifier>;
using FunctionDef = std::tuple<Identifier, ArgList, Type>;
}
解析器:
namespace parser {
struct Identifier
: qi::grammar<string::iterator, ast::Identifier(), ascii::space_type> {
Identifier() : Identifier::base_type(start) {
start = qi::char_("[a-zA-Z_]") >> *qi::char_("[a-zA-Z_0-9]");
}
qi::rule<string::iterator, ast::Identifier(), ascii::space_type> start;
};
struct Type : qi::symbols<char, ast::Type> {
Type() {
add("int32", ast::INT32)("float32", ast::FLOAT32)("bool", ast::BOOL);
}
};
struct TypedIdentifier
: qi::grammar<string::iterator, ast::TypedIdentifier(), ascii::space_type> {
TypedIdentifier() : TypedIdentifier::base_type(start) {
start = Identifier() >> Type();
}
qi::rule<string::iterator, ast::TypedIdentifier(), ascii::space_type> start;
};
struct FunctionDef
: qi::grammar<string::iterator, ast::FunctionDef(), ascii::space_type> {
FunctionDef() : FunctionDef::base_type(start) {
start = "def" >> Identifier() >> "(" >> (TypedIdentifier() % ",") >> ")" >>
Type() >> ":";
}
qi::rule<string::iterator, ast::FunctionDef(), ascii::space_type> start;
};
}
然后我在尝试解析截取的简单代码时遇到段错误。尝试解析函数定义时发生段错误,但我调试了一下,尝试解析类型化标识符时已经发生段错误。
int main() {
string foo("foo int32");
auto begin = foo.begin();
auto end = foo.end();
ast::TypedIdentifier id;
bool result = qi::phrase_parse(begin, end, parser::TypedIdentifier(),
ascii::space, id);
cout << "Parse " << (result ? "successful " : "failed ") << endl;
return 0;
}
我测试了标识符和类型解析器,它们可以独立运行。 我还尝试定义全局语法而不是实例化新语法,但我也遇到了段错误。 我在这里做错了什么?
链接的答案¹确实显示出了问题(语法规则中有对临时变量的引用)。
我建议您不需要为每个作品都创建 grammar<>
个实例。相反,将它们作为规则分组到语法中更有效(也更优雅):
#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/std_tuple.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace ast {
enum Type { BOOL, INT32, FLOAT32 };
using Identifier = std::string;
using TypedIdentifier = std::tuple<Identifier, Type>;
using ArgList = std::vector<TypedIdentifier>;
using FunctionDef = std::tuple<Identifier, ArgList, Type>;
std::ostream& operator<<(std::ostream& os, Type v) {
switch (v) {
case BOOL: return os << "BOOL";
case INT32: return os << "INT32";
case FLOAT32: return os << "FLOAT32";
}
return os << "?";
}
template <typename... Ts> std::ostream& operator<<(std::ostream& os, std::tuple<Ts...> const& v) {
return os << boost::fusion::as_vector(v);
}
template <typename T> std::ostream& operator<<(std::ostream& os, std::vector<T> const& v) {
os << "{";
for (auto& el : v) os << el << " ";
return os << "}";
}
}
namespace parser {
template <typename Iterator>
struct MyGrammarImpl : qi::grammar<Iterator, ast::FunctionDef(), ascii::space_type> {
MyGrammarImpl() : MyGrammarImpl::base_type(functionDef)
{
identifier = qi::char_("[a-zA-Z_]") >> *qi::char_("[a-zA-Z_0-9]");
typedIdentifier = identifier >> type;
functionDef = "def" >> identifier >> '(' >> (typedIdentifier % ",") >> ')' >> type >> ":";
type = type_;
BOOST_SPIRIT_DEBUG_NODES((identifier)(typedIdentifier)(type)(functionDef))
}
private:
qi::rule<Iterator, ast::TypedIdentifier(), ascii::space_type> typedIdentifier;
qi::rule<Iterator, ast::FunctionDef(), ascii::space_type> functionDef;
qi::rule<Iterator, ast::Type(), ascii::space_type> type;
// lexemes
qi::rule<Iterator, ast::Identifier()> identifier;
struct Type : qi::symbols<char, ast::Type> {
Type() {
add("int32", ast::INT32)
("float32", ast::FLOAT32)
("bool", ast::BOOL)
;
}
};
Type type_;
};
using MyGrammar = MyGrammarImpl<std::string::const_iterator>;
}
int main() {
std::string const foo("def bar(foo int32 ) bool:");
auto begin = foo.begin();
auto end = foo.end();
ast::FunctionDef def;
bool result = qi::phrase_parse(begin, end, parser::MyGrammar(), ascii::space, def);
std::cout << "Parse " << (result ? "successful " : "failed ") << std::endl;
if (result)
std::cout << def << "\n";
}
打印:
Parse successful
(bar {(foo INT32) } BOOL)
有调试信息:
<functionDef>
<try>def bar(foo int32 ) </try>
<identifier>
<try>bar(foo int32 ) bool</try>
<success>(foo int32 ) bool:</success>
<attributes>[[b, a, r]]</attributes>
</identifier>
<typedIdentifier>
<try>foo int32 ) bool:</try>
<identifier>
<try>foo int32 ) bool:</try>
<success> int32 ) bool:</success>
<attributes>[[f, o, o]]</attributes>
</identifier>
<type>
<try> int32 ) bool:</try>
<success> ) bool:</success>
<attributes>[INT32]</attributes>
</type>
<success> ) bool:</success>
<attributes>[[[f, o, o], INT32]]</attributes>
</typedIdentifier>
<type>
<try> bool:</try>
<success>:</success>
<attributes>[BOOL]</attributes>
</type>
<success></success>
<attributes>[[[b, a, r], [[[f, o, o], INT32]], BOOL]]</attributes>
</functionDef>
¹ Internal Boost::Spirit code segfaults when parsing a composite grammar