无法使用 Boost Spirit X3 解析空的 C++ 结构
Cannot parse an empty C++ struct with Boost Spirit X3
我正在尝试解析 header 文件中定义的 C++ 结构。我开始定义语法,但我遇到了问题。
这是我的代码:
#include <boost/spirit/home/x3.hpp>
int main() {
namespace x3 = boost::spirit::x3;
// Parse "#if !defined XXX_X_" or "'#ifndef X_X"
auto Ifndef = x3::skip(x3::space)[(x3::lit('#') >> (x3::lit("ifndef") | (x3::lit("if") >> x3::lit("!defined"))))];
auto HeaderGuardFirstRow = Ifndef >> +(x3::alnum | '_');
// Parse "#define XXX_X" or "# define XXX_X"
auto Define = x3::skip(x3::space)[(x3::lit('#') >> x3::lit("define"))];
auto HeaderGuardSecondRow = Define >> +(x3::alnum | '_');
// Parse
// "
// #if !defined XXX_X_
// #define XXX_X
// "
auto HeaderGuardBegin = HeaderGuardFirstRow >> HeaderGuardSecondRow;
// Parse "#endif" or "# endif"
auto HeaderGuardEnd = x3::skip(x3::space)[x3::lit('#') >> (x3::lit("endif"))];
// Parse variable name like "xxx" or "my_var"
auto VariableName = x3::lexeme[x3::char_("a-zA-Z_") >> *(x3::alnum | x3::lit("_"))];
// Skipper for C++ comments (nested /* */ are not handled for now)
auto SingleLineComment = "//" >> *(x3::char_ - x3::eol) >> (x3::eol | x3::eoi);
auto BlockComment = "/*" >> *(x3::char_ - "*/") >> "*/";
auto Skipper = SingleLineComment | BlockComment | x3::ascii::space;
// Parse
// "
// typedef struct {
// } MyStruct;
// "
// ERROR: This parse does not work
auto StructType = -x3::lit("typedef") >> x3::skip(Skipper)[x3::lit("struct") >> x3::lit('{')] >>
x3::skip(Skipper)[x3::lit('}') >> VariableName >> x3::lit(";")];
// Header grammar. Should parse
// "
// #if !defined XXX_H
// #define XXX_H
// typedef struct {
// } MyStruct;
// #endif
// "
auto grammar = HeaderGuardBegin >> *(StructType) >> HeaderGuardEnd;
std::string data01(R"xx(
#if !defined XXX_H
#define XXX_H
#endif
)xx");
bool r = phrase_parse(
data01.begin(),
data01.end(),
grammar,
Skipper
);
std::string data02(R"xx(
#if !defined XXX_H
// Single line comment
#define XXX_H
#endif // !XXX_H
)xx");
r = phrase_parse(
data02.begin(),
data02.end(),
grammar,
Skipper
);
std::string data03(R"xx(
#if !defined XXX_H
#define XXX_H
typedef struct {
} MyStruct;
#endif
)xx");
// r = false: This parsing does not work.
r = phrase_parse(
data03.begin(),
data03.end(),
grammar,
Skipper
);
return 0;
}
代码中有三个字符串需要解析:一个只有 header 个守卫,第二个与第一个类似但有一些 C++ 注释,第三个有一个空结构。
这是最后一个解析失败的,我不明白为什么。在我用于结构 StructType
的语法中,我首先检查可选的 typedef
,然后检查关键字 struct
是否可以附加 {
字符,然后我搜索 }
字符后跟变量名,然后是 ;
.
我不明白错误在哪里。我在解析空结构时做错了什么?
几件事:
- 船长被周围的上下文继承
- 你在 header 守卫的 "tokens" 周围没有
lexeme[]
,所以它会匹配包括 typedefstruct
因为 space
也包括line-ends.
你可以简化事情:
#include <iostream>
#include <boost/spirit/home/x3.hpp>
#include <iomanip>
int main() {
namespace x3 = boost::spirit::x3;
// Parse "#if !defined XXX_X_" or "'#ifndef X_X"
auto const Ifndef = (x3::lit('#') >> (x3::lit("ifndef") | (x3::lit("if") >> x3::lit("!defined"))));
auto const HeaderGuardFirstRow = Ifndef >> x3::lexeme[+(x3::alnum | '_')];
// Parse "#define XXX_X" or "# define XXX_X"
auto const Define = (x3::lit('#') >> x3::lit("define"));
auto const HeaderGuardSecondRow = Define >> x3::lexeme[+(x3::alnum | '_')];
// Parse
// "
// #if !defined XXX_X_
// #define XXX_X
// "
auto const HeaderGuardBegin = HeaderGuardFirstRow >> HeaderGuardSecondRow;
// Parse "#endif" or "# endif"
auto const HeaderGuardEnd = x3::lit('#') >> (x3::lit("endif"));
// Parse variable name like "xxx" or "my_var"
auto const VariableName = x3::lexeme[x3::char_("a-zA-Z_") >> *(x3::alnum | x3::lit("_"))];
// Skipper for C++ comments (nested /* */ are not handled for now)
auto const SingleLineComment = "//" >> *(x3::char_ - x3::eol) >> (x3::eol | x3::eoi);
auto const BlockComment = "/*" >> *(x3::char_ - "*/") >> "*/";
auto const Skipper = SingleLineComment | BlockComment | x3::ascii::space;
auto const StructType =
-x3::lit("typedef")
>> "struct" >> '{' >> '}' >> VariableName
>> ";"
;
// Header grammar.
auto grammar = HeaderGuardBegin >> *StructType >> HeaderGuardEnd;
for (std::string const data : {
R"xx(
#if !defined XXX_H
#define XXX_H
#endif
)xx",
R"xx(
#if !defined XXX_H
// Single line comment
#define XXX_H
#endif // !XXX_H
)xx",
R"xx(
#if !defined XXX_H
#define XXX_H
typedef struct {
// aloha
} MyStruct;
typedef struct { /* caramba */ } MyOtherStruct
;
#endif
)xx" }) {
auto f = data.begin(), l = data.end();
std::cout << "Parsing " << std::quoted(data) << "\n";
if (phrase_parse(f, l, grammar, Skipper)) {
std::cout << "Parsed\n";
} else {
std::cout << "Failed to parse\n";
}
if (f!=l) {
std::cout << "Remaining unparsed: " << std::quoted(std::string(f,l)) << "\n";
}
}
}
版画
Parsing "
#if !defined XXX_H
#define XXX_H
#endif
"
Parsed
Parsing "
#if !defined XXX_H
// Single line comment
#define XXX_H
#endif // !XXX_H
"
Parsed
Parsing "
#if !defined XXX_H
#define XXX_H
typedef struct {
// aloha
} MyStruct;
typedef struct { /* caramba */ } MyOtherStruct
;
#endif
"
Parsed
sehe 在保存您的代码方面做得非常出色。但是因为我对你想要的东西有自己的想法,所以我想我会 post。我确实从中了解到船长可能非常复杂。另一方面,我通常跳过 omit
.
您似乎想要丢弃所有 #
起始行和评论,我将其归结为。在某些时候,我确定您将解析为一个属性。所以我从结构名称开始,因此是 dest
字符串。
只是另一种看待它的方式。
#include <iostream>
#define BOOST_SPIRIT_X3_DEBUG
#include <boost/spirit/home/x3.hpp>
using namespace boost::spirit::x3;
// Parse all/skip"#..."
auto const def = lit("#") >> omit[lexeme[*char_("a-zA-Z_! ")]];
// Skipper for C++ comments (nested /* */ are not handled for now)
auto const comment = ("//" >> omit[*(char_ - eol)]) | ("/*" >> omit[*(char_ - "*/")] >> "*/");
auto const skipper = *(def | comment | space);
// Parse variable name like "xxx" or "my_var"
auto const name = rule<struct name, std::string>("name") = *char_("a-zA-Z_");
auto const struct_rule = lit("typedef") >> "struct" >> '{' >> omit[*(char_-'}')] >> '}' >> name >> ';';
auto const final = skipper >> -struct_rule >> skipper;
void parse(char* in)
{
std::string str(in);
auto it = str.begin();
std::string dest;
bool r = phrase_parse(it, str.end(), final, space, dest);// , dest);
std::cout << std::boolalpha << "r: " << r << std::endl
<< std::string(it, str.end()) << std::endl
<< "DEST: " << dest << std::endl;
}
int main()
{
parse(R"xx(
#if !defined XXX_H
#define XXX_H
#endif
)xx");
parse(R"xx(
#if !defined XXX_H
// Single line comment
#define XXX_H
#endif // !XXX_H
)xx");
parse(R"xx(
#if !defined XXX_H
#define XXX_H
typedef struct {
} MyStruct;
#endif
)xx");
return 0;
}
打印:
r: true
DEST:
r: true
DEST:
<name>
<try> MyStruct;\n\t\t#endif\n</try>
<success>;\n\t\t#endif\n\t </success>
<attributes>[M, y, S, t, r, u, c, t]</attributes>
</name>
r: true
DEST: MyStruct
我正在尝试解析 header 文件中定义的 C++ 结构。我开始定义语法,但我遇到了问题。
这是我的代码:
#include <boost/spirit/home/x3.hpp>
int main() {
namespace x3 = boost::spirit::x3;
// Parse "#if !defined XXX_X_" or "'#ifndef X_X"
auto Ifndef = x3::skip(x3::space)[(x3::lit('#') >> (x3::lit("ifndef") | (x3::lit("if") >> x3::lit("!defined"))))];
auto HeaderGuardFirstRow = Ifndef >> +(x3::alnum | '_');
// Parse "#define XXX_X" or "# define XXX_X"
auto Define = x3::skip(x3::space)[(x3::lit('#') >> x3::lit("define"))];
auto HeaderGuardSecondRow = Define >> +(x3::alnum | '_');
// Parse
// "
// #if !defined XXX_X_
// #define XXX_X
// "
auto HeaderGuardBegin = HeaderGuardFirstRow >> HeaderGuardSecondRow;
// Parse "#endif" or "# endif"
auto HeaderGuardEnd = x3::skip(x3::space)[x3::lit('#') >> (x3::lit("endif"))];
// Parse variable name like "xxx" or "my_var"
auto VariableName = x3::lexeme[x3::char_("a-zA-Z_") >> *(x3::alnum | x3::lit("_"))];
// Skipper for C++ comments (nested /* */ are not handled for now)
auto SingleLineComment = "//" >> *(x3::char_ - x3::eol) >> (x3::eol | x3::eoi);
auto BlockComment = "/*" >> *(x3::char_ - "*/") >> "*/";
auto Skipper = SingleLineComment | BlockComment | x3::ascii::space;
// Parse
// "
// typedef struct {
// } MyStruct;
// "
// ERROR: This parse does not work
auto StructType = -x3::lit("typedef") >> x3::skip(Skipper)[x3::lit("struct") >> x3::lit('{')] >>
x3::skip(Skipper)[x3::lit('}') >> VariableName >> x3::lit(";")];
// Header grammar. Should parse
// "
// #if !defined XXX_H
// #define XXX_H
// typedef struct {
// } MyStruct;
// #endif
// "
auto grammar = HeaderGuardBegin >> *(StructType) >> HeaderGuardEnd;
std::string data01(R"xx(
#if !defined XXX_H
#define XXX_H
#endif
)xx");
bool r = phrase_parse(
data01.begin(),
data01.end(),
grammar,
Skipper
);
std::string data02(R"xx(
#if !defined XXX_H
// Single line comment
#define XXX_H
#endif // !XXX_H
)xx");
r = phrase_parse(
data02.begin(),
data02.end(),
grammar,
Skipper
);
std::string data03(R"xx(
#if !defined XXX_H
#define XXX_H
typedef struct {
} MyStruct;
#endif
)xx");
// r = false: This parsing does not work.
r = phrase_parse(
data03.begin(),
data03.end(),
grammar,
Skipper
);
return 0;
}
代码中有三个字符串需要解析:一个只有 header 个守卫,第二个与第一个类似但有一些 C++ 注释,第三个有一个空结构。
这是最后一个解析失败的,我不明白为什么。在我用于结构 StructType
的语法中,我首先检查可选的 typedef
,然后检查关键字 struct
是否可以附加 {
字符,然后我搜索 }
字符后跟变量名,然后是 ;
.
我不明白错误在哪里。我在解析空结构时做错了什么?
几件事:
- 船长被周围的上下文继承
- 你在 header 守卫的 "tokens" 周围没有
lexeme[]
,所以它会匹配包括typedefstruct
因为space
也包括line-ends.
你可以简化事情:
#include <iostream>
#include <boost/spirit/home/x3.hpp>
#include <iomanip>
int main() {
namespace x3 = boost::spirit::x3;
// Parse "#if !defined XXX_X_" or "'#ifndef X_X"
auto const Ifndef = (x3::lit('#') >> (x3::lit("ifndef") | (x3::lit("if") >> x3::lit("!defined"))));
auto const HeaderGuardFirstRow = Ifndef >> x3::lexeme[+(x3::alnum | '_')];
// Parse "#define XXX_X" or "# define XXX_X"
auto const Define = (x3::lit('#') >> x3::lit("define"));
auto const HeaderGuardSecondRow = Define >> x3::lexeme[+(x3::alnum | '_')];
// Parse
// "
// #if !defined XXX_X_
// #define XXX_X
// "
auto const HeaderGuardBegin = HeaderGuardFirstRow >> HeaderGuardSecondRow;
// Parse "#endif" or "# endif"
auto const HeaderGuardEnd = x3::lit('#') >> (x3::lit("endif"));
// Parse variable name like "xxx" or "my_var"
auto const VariableName = x3::lexeme[x3::char_("a-zA-Z_") >> *(x3::alnum | x3::lit("_"))];
// Skipper for C++ comments (nested /* */ are not handled for now)
auto const SingleLineComment = "//" >> *(x3::char_ - x3::eol) >> (x3::eol | x3::eoi);
auto const BlockComment = "/*" >> *(x3::char_ - "*/") >> "*/";
auto const Skipper = SingleLineComment | BlockComment | x3::ascii::space;
auto const StructType =
-x3::lit("typedef")
>> "struct" >> '{' >> '}' >> VariableName
>> ";"
;
// Header grammar.
auto grammar = HeaderGuardBegin >> *StructType >> HeaderGuardEnd;
for (std::string const data : {
R"xx(
#if !defined XXX_H
#define XXX_H
#endif
)xx",
R"xx(
#if !defined XXX_H
// Single line comment
#define XXX_H
#endif // !XXX_H
)xx",
R"xx(
#if !defined XXX_H
#define XXX_H
typedef struct {
// aloha
} MyStruct;
typedef struct { /* caramba */ } MyOtherStruct
;
#endif
)xx" }) {
auto f = data.begin(), l = data.end();
std::cout << "Parsing " << std::quoted(data) << "\n";
if (phrase_parse(f, l, grammar, Skipper)) {
std::cout << "Parsed\n";
} else {
std::cout << "Failed to parse\n";
}
if (f!=l) {
std::cout << "Remaining unparsed: " << std::quoted(std::string(f,l)) << "\n";
}
}
}
版画
Parsing "
#if !defined XXX_H
#define XXX_H
#endif
"
Parsed
Parsing "
#if !defined XXX_H
// Single line comment
#define XXX_H
#endif // !XXX_H
"
Parsed
Parsing "
#if !defined XXX_H
#define XXX_H
typedef struct {
// aloha
} MyStruct;
typedef struct { /* caramba */ } MyOtherStruct
;
#endif
"
Parsed
sehe 在保存您的代码方面做得非常出色。但是因为我对你想要的东西有自己的想法,所以我想我会 post。我确实从中了解到船长可能非常复杂。另一方面,我通常跳过 omit
.
您似乎想要丢弃所有 #
起始行和评论,我将其归结为。在某些时候,我确定您将解析为一个属性。所以我从结构名称开始,因此是 dest
字符串。
只是另一种看待它的方式。
#include <iostream>
#define BOOST_SPIRIT_X3_DEBUG
#include <boost/spirit/home/x3.hpp>
using namespace boost::spirit::x3;
// Parse all/skip"#..."
auto const def = lit("#") >> omit[lexeme[*char_("a-zA-Z_! ")]];
// Skipper for C++ comments (nested /* */ are not handled for now)
auto const comment = ("//" >> omit[*(char_ - eol)]) | ("/*" >> omit[*(char_ - "*/")] >> "*/");
auto const skipper = *(def | comment | space);
// Parse variable name like "xxx" or "my_var"
auto const name = rule<struct name, std::string>("name") = *char_("a-zA-Z_");
auto const struct_rule = lit("typedef") >> "struct" >> '{' >> omit[*(char_-'}')] >> '}' >> name >> ';';
auto const final = skipper >> -struct_rule >> skipper;
void parse(char* in)
{
std::string str(in);
auto it = str.begin();
std::string dest;
bool r = phrase_parse(it, str.end(), final, space, dest);// , dest);
std::cout << std::boolalpha << "r: " << r << std::endl
<< std::string(it, str.end()) << std::endl
<< "DEST: " << dest << std::endl;
}
int main()
{
parse(R"xx(
#if !defined XXX_H
#define XXX_H
#endif
)xx");
parse(R"xx(
#if !defined XXX_H
// Single line comment
#define XXX_H
#endif // !XXX_H
)xx");
parse(R"xx(
#if !defined XXX_H
#define XXX_H
typedef struct {
} MyStruct;
#endif
)xx");
return 0;
}
打印:
r: true
DEST:
r: true
DEST:
<name>
<try> MyStruct;\n\t\t#endif\n</try>
<success>;\n\t\t#endif\n\t </success>
<attributes>[M, y, S, t, r, u, c, t]</attributes>
</name>
r: true
DEST: MyStruct