boost spirit x3 的编译性能问题
Compilation performance issues with boost spirit x3
对于我之前提出的问题,这个问题的跟进太迟了。基本上,我有一个相当参数化的解析器,用于一系列用 boost spirit x3 编写的语言。 @sehe 极大地帮助我做到了这一点:
不过,既然我在那里,我有一些奇怪的(对我来说)爆炸式编译时间。我无法 post 原始代码,因此一直在研究 'minimal' 示例并最终找到了至少一个罪魁祸首。这是我的代码:
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/adapted.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
using x3::char_;
using x3::int_;
using x3::lexeme;
struct boring1 { std::string name; int i; };
struct boring2 { std::string name; int i; };
struct sub1 { std::string name; int i; };
struct sub2 { std::string name; int i; };
struct sub3 { std::string name; int i; };
struct sub4 { std::string name; int i; };
struct sub : x3::variant<sub1,sub2,sub3,sub4> {
sub()=default;
sub(const sub&)=default;
sub& operator=(const sub&)=default;
using base_type::base_type;
using base_type::operator=;
};
struct leaf1 { char tag; int i; };
struct leaf2 { char tag; int i, j, k; };
struct leaf3 { std::string name; int i, j; };
struct leaf23 : x3::variant<leaf2, leaf3> {
leaf23(const leaf23&)=default;
leaf23()=default;
leaf23& operator=(const leaf23&)=default;
using base_type::base_type;
using base_type::operator=;
};
template <typename T> struct node1 { std::string name; T leaf; };
template <typename T> struct node2 { std::string name; T leaf; };
template <typename T> struct node3 { std::string name; T leaf; };
template <typename T> struct node4 { std::string name; std::vector<T> leafs; };
template <typename T> struct node5 { std::string name; std::vector<T> leafs; };
template <typename T> struct node6 { std::string name; std::vector<T> leafs; };
template <typename T> struct cont1 { int i; std::string name; std::vector<T> ts; };
template <typename T> struct cont2 { unsigned i; char t; std::string name; std::vector<T> ts; };
template <typename T, typename U> struct cont3 {
std::vector<T> ts; std::string name; std::vector<U> us;
};
template <typename T, typename U> struct cont4 {
std::string name; std::vector<U> us; std::vector<T> ts;
};
template <typename T> struct program { int name; std::vector<T> stmts; };
BOOST_FUSION_ADAPT_STRUCT(boring1, name, i)
BOOST_FUSION_ADAPT_STRUCT(boring2, name, i)
BOOST_FUSION_ADAPT_STRUCT(sub1, name, i)
BOOST_FUSION_ADAPT_STRUCT(sub2, name, i)
BOOST_FUSION_ADAPT_STRUCT(sub3, name, i)
BOOST_FUSION_ADAPT_STRUCT(sub4, name, i)
BOOST_FUSION_ADAPT_STRUCT(leaf1, tag, i)
BOOST_FUSION_ADAPT_STRUCT(leaf2, tag, i, j, k)
BOOST_FUSION_ADAPT_STRUCT(leaf3, name, i, j)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node1)(T), name, leaf)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node2)(T), name, leaf)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node3)(T), name, leaf)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node4)(T), name, leafs)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node5)(T), name, leafs)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node6)(T), name, leafs)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (cont1)(T), i, name, ts)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (cont2)(T), i, t, name, ts)
BOOST_FUSION_ADAPT_TPL_STRUCT((T) (U), (cont3)(T)(U), ts, name, us)
BOOST_FUSION_ADAPT_TPL_STRUCT((T) (U), (cont4)(T)(U), name, us, ts)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (program)(T), name, stmts)
template <typename T> struct as_type {
template <typename Expr>
auto operator[](Expr&& expr) const {
return x3::rule<struct _, T>{"as"} = x3::as_parser(std::forward<Expr>(expr));
}
};
template <typename T> static const as_type<T> as = {};
namespace Generic {
x3::rule<class boring1, boring1> const boring1_p = "boring 1 parser";
auto const boring1_p_def = lexeme[ +x3::alnum ] >> int_;
x3::rule<class boring2, boring2> const boring2_p = "boring 2 parser";
auto const boring2_p_def = lexeme[ +x3::alnum ] >> int_;
BOOST_SPIRIT_DEFINE( boring1_p, boring2_p )
x3::rule<class sub1, sub1> const sub1_p = "sub 1 parser";
auto const sub1_p_def = lexeme[ +x3::alnum ] >> int_;
x3::rule<class sub2, sub2> const sub2_p = "sub 2 parser";
auto const sub2_p_def = lexeme[ +x3::alnum ] >> int_;
x3::rule<class sub3, sub3> const sub3_p = "sub 3 parser";
auto const sub3_p_def = lexeme[ +x3::alnum ] >> int_;
x3::rule<class sub4, sub4> const sub4_p = "sub 4 parser";
auto const sub4_p_def = lexeme[ +x3::alnum ] >> int_;
x3::rule<class sub, sub> const sub_p = "combined sub parser";
auto const sub_p_def = sub1_p | sub2_p | sub3_p | sub4_p;
BOOST_SPIRIT_DEFINE( sub1_p, sub2_p, sub3_p, sub4_p, sub_p )
template <typename T> auto leaf_p = x3::eps(false);
template <typename T> auto leaves() {
return as<std::vector<T>>[ leaf_p<T> % ',' ];
}
template <> auto leaf_p<leaf1> = as<leaf1>[lexeme[ (char_('t') | char_('u')) >> int_ ]];
template <> auto leaf_p<leaf2> = as<leaf2>[lexeme[ char_('t') >> int_ >> "_" >> int_ >> "_" >> int_]];
template <> auto leaf_p<leaf3> = as<leaf3>[lexeme[ (+x3::alnum) >> "_" >> int_ >> "_" >> int_]];
template <> auto leaf_p<leaf23> = as<leaf23>[leaf_p<leaf2> | leaf_p<leaf3>];
template <typename T> auto parseNode1() {
return as<node1<T>>[ (x3::string("xx") | x3::string("yy") | x3::string("zz")) >> leaf_p<T>];
}
template <typename T> auto parseNode2() {
return as<node2<T>>[ x3::string("node1") >> leaf_p<T>];
}
template <typename T> auto parseNode3() {
return as<node3<T>>[ x3::string("node3") >> leaf_p<T>];
}
template <typename T> auto parseNode4() {
return as<node4<T>>[ x3::string("node4") >> leaves<T>()];
}
template <typename T> auto parseNode5() {
return as<node5<T>>[ x3::string("node5") >> leaves<T>()];
}
template <typename T> auto parseNode6() {
return as<node6<T>>[ x3::string("node6") >> leaves<T>()];
}
struct With1 {};
struct With2 {};
x3::rule<class header1,std::string> const header1 = "header 1";
auto const header1_def = lexeme[+x3::alnum][([](auto& ctx) {
std::string v = x3::_attr(ctx);
x3::get<With1>(ctx) = v;
x3::_val(ctx) = v;
})];
x3::rule<class footer1,std::string> const footer1 = "footer 1";
auto const footer1_def = lexeme[+x3::alnum][([](auto& ctx) {
std::string v = x3::get<With1>(ctx);
x3::_pass(ctx) = v == x3::_attr(ctx);
})];
x3::rule<class header2,std::string> const header2 = "header 2";
auto const header2_def = lexeme[+x3::alnum][([](auto& ctx) {
std::string v = x3::_attr(ctx);
x3::get<With2>(ctx) = v;
x3::_val(ctx) = v;
})];
x3::rule<class footer2,std::string> const footer2 = "footer 2";
auto const footer2_def = lexeme[+x3::alnum][([](auto& ctx) {
std::string v = x3::get<With2>(ctx);
x3::_pass(ctx) = v == x3::_attr(ctx);
})];
BOOST_SPIRIT_DEFINE(header1, footer1, header2, footer2);
template <typename T>
auto const parseCont1(x3::rule<class c1, T> const& parser) {
return as<cont1<T>>[ int_ >> x3::string("cont1") >> "{" >> +parser >> "}" ];
}
template <typename T>
auto const parseCont2(x3::rule<class c1, T> const& parser) {
return as<cont2<T>>[ x3::bin >> char_ >> header1 >> "{" >> +parser >> "}" >> x3::omit[footer1] ];
}
template <typename T, typename U>
auto const parseCont3(x3::rule<class c1, U> const& parser) {
return as<cont3<T,U>>[ leaves<T>() >> x3::string("cont3") >> "{" >> +parser >> "}" ];
}
auto vectorize = [](auto &ctx){ _val(ctx) = { _attr(ctx) }; };
template <typename stmt>
auto cont4_simple_body(x3::rule<class c1, stmt> const& parser) {
return as<std::vector<stmt>>[parser[ vectorize ]];
}
/* What follows are three different possible implementations of cont4_body, which
* is used in the parseCont4 rule below.
* - the first parses wrong, but is fast to compile (I don't understand why this is
* the case, but in tests from my real language, it produces empty vectors,
* always
* - the second is correct, but slower (maybe 2x)
* - the third is also correct, but slower than the above (another 2-3x)
*/
// wrong but fast compile: ~64s when just Language1 is enabled below in main
// template <typename stmt>
// auto cont4_body(x3::rule<class c1, stmt> const& parser) {
// return
// as<std::vector<stmt>>
// [ "{" >> +parser >> "}"
// | parser[vectorize]
// ];
// }
// correct, slower compile: ~189s when just Language1 is enabled below in main
// template <typename stmt>
// auto cont4_body(x3::rule<class c1, stmt> const& parser) {
// return
// as<std::vector<stmt>>
// [ "{" >> +parser >> "}"
// | cont4_simple_body(parser)
// ];
// }
// correct, even slower compile: ~424s when just Language1 is enabled below in main
template <typename stmt>
auto cont4_body(x3::rule<class c1, stmt> const& parser) {
return
as<std::vector<stmt>>["{" >> + parser >> "}"]
| as<std::vector<stmt>>[cont4_simple_body(parser)]
;
}
template <typename T, typename U>
auto const parseCont4(x3::rule<class c1, U> const& parser) {
return as<cont4<T,U>>[ header2 >> cont4_body(parser) >> leaves<T>() >> x3::omit[footer2]];
}
}
namespace Language1 {
struct lang1_stmt;
struct lang1_stmt : x3::variant<
boring1,
boring2,
sub,
node1<leaf1>,
node2<leaf1>,
node3<leaf1>,
node4<leaf1>,
node5<leaf1>,
node6<leaf1>,
x3::forward_ast<cont1<lang1_stmt>>,
x3::forward_ast<cont2<lang1_stmt>>,
x3::forward_ast<cont3<leaf1, lang1_stmt>>,
x3::forward_ast<cont4<leaf1, lang1_stmt>>
> {
lang1_stmt(const lang1_stmt&)=default;
lang1_stmt()=default;
lang1_stmt& operator=(const lang1_stmt&)=default;
using base_type::base_type;
using base_type::operator=;
};
x3::rule<class Generic::c1, lang1_stmt> lang1_stmt_p = "lang1 stmt parser";
auto const lang1_stmt_p_def =
Generic::boring1_p
| Generic::boring2_p
| Generic::sub_p
| Generic::parseNode1<leaf1>()
| Generic::parseNode2<leaf1>()
| Generic::parseNode3<leaf1>()
| Generic::parseNode4<leaf1>()
| Generic::parseNode5<leaf1>()
| Generic::parseNode6<leaf1>()
| Generic::parseCont1<lang1_stmt>( lang1_stmt_p )
| Generic::parseCont2<lang1_stmt>( lang1_stmt_p )
| Generic::parseCont3<leaf1, lang1_stmt>( lang1_stmt_p )
| Generic::parseCont4<leaf1, lang1_stmt>( lang1_stmt_p )
;
BOOST_SPIRIT_DEFINE( lang1_stmt_p );
std::string with1;
std::string with2;
auto const stmts
= x3::rule<class grammar_stmts1, std::vector<lang1_stmt>> { "Lang1 statements" }
= x3::with<Generic::With1>(with1)
[x3::with<Generic::With2>(with2)[ +lang1_stmt_p ]];
auto const grammar
= x3::rule<class grammar1, program<lang1_stmt>> { "Lang1 grammar" }
= x3::skip( x3::space )[x3::int_ >> stmts];
}
namespace Language2 {
struct lang2_stmt;
struct lang2_stmt : x3::variant<
boring1,
boring2,
sub,
node1<leaf23>,
node2<leaf23>,
node3<leaf23>,
node4<leaf23>,
node5<leaf23>,
node6<leaf23>,
x3::forward_ast<cont1<lang2_stmt>>,
x3::forward_ast<cont2<lang2_stmt>>,
x3::forward_ast<cont3<leaf23, lang2_stmt>>,
x3::forward_ast<cont4<leaf23, lang2_stmt>>
> {
lang2_stmt(const lang2_stmt&)=default;
lang2_stmt()=default;
lang2_stmt& operator=(const lang2_stmt&)=default;
using base_type::base_type;
using base_type::operator=;
};
x3::rule<class Generic::c1, lang2_stmt> lang2_stmt_p = "lang2 stmt parser";
auto const lang2_stmt_p_def =
Generic::boring1_p
| Generic::boring2_p
| Generic::sub_p
| Generic::parseNode1<leaf23>()
| Generic::parseNode2<leaf23>()
| Generic::parseNode3<leaf23>()
| Generic::parseNode4<leaf23>()
| Generic::parseNode5<leaf23>()
| Generic::parseNode6<leaf23>()
| Generic::parseCont1<lang2_stmt>( lang2_stmt_p )
| Generic::parseCont2<lang2_stmt>( lang2_stmt_p )
| Generic::parseCont3<leaf23, lang2_stmt>( lang2_stmt_p )
| Generic::parseCont4<leaf23, lang2_stmt>( lang2_stmt_p )
;
BOOST_SPIRIT_DEFINE( lang2_stmt_p );
std::string with1;
std::string with2;
auto const stmts
= x3::rule<class grammar_stmts2, std::vector<lang2_stmt>> { "Lang2 statements" }
= x3::with<Generic::With1>(with1)
[x3::with<Generic::With2>(with2)[ +lang2_stmt_p ]];
auto const grammar
= x3::rule<class grammar2, program<lang2_stmt>> { "Lang2 grammar" }
= x3::skip( x3::space )[x3::int_ >> stmts];
}
void test(auto const& grammar, std::string text, auto ast) {
auto f = text.begin(), l = text.end();
std::cout << "\nParsing: " << std::quoted(text, '\'') << "\n";
if (boost::spirit::x3::parse(f, l, grammar, ast)) {
std::cout << "Success!\n";
} else {
std::cout << " -- Failed " << std::quoted(text, '\'') << "\n";
}
}
int main() {
test( Language1::grammar,
"",
program<Language1::lang1_stmt>{});
// test(
// Language2::grammar,
// "",
// program<Language2::lang2_stmt>{});
}
你可以在这个例子中看到我定义了两种语言。它们只是在 ast 的叶子上有所不同,Language2 中语言的目标比 Language1 中的更复杂。我已经复制了我在真实解析器中使用的所有功能,但是为不同的语句类型创建了愚蠢的假解析器。值得注意的是:我看到的性能问题在 Language2 中被大大夸大了。我实际上只在我的主要功能中链接到 Language1 以节省测试时间。
问题似乎出在我对条件语句的编码中,尤其是 cond4。在这里,条件可以有选择地有大括号。如果有 none,则只包含一个语句。请注意我包含的条件主体的解析器的 3 种不同实现。第一个实际上解析不正确(我的意思是,解析成功,但向量始终为空!)。接下来的两个正确解析但大大增加了编译时间。
知道这里发生了什么吗?
当我使用本地定义的“匿名”规则时,爆炸性的编译时间通常会打击我;我的意思是 不是 通过类型标记参数使用 BOOST_SPRIIT_* 宏与其解析器实现相关联的规则,而是隐式地。
要注意的是,“隐式”规则定义被带入上下文类型,这会导致最坏情况下的模板实例化行为很糟糕,尤其是面对
- 递归规则
- 本地更换船长(
lexeme[]
。skip[]
,noskip[]
)
- 任何其他上下文修饰指令(
x3::with<Tag>(expr) [p]
或任何基于它的构建指令)
因此,原则上避免爆炸的一个好方法是努力将规则实例与声明站点分开(使用宏),该过程通常称为“编译防火墙”- 因为它包含单独翻译单元中的实现细节。
这种方法与完全按功能生成解析器的想法之间存在轻微的紧张关系,因为原型解析器工厂看起来有点像
auto makeParser(auto arg) {
return
x3::rule<struct tag_type, Attribute> {"aarg"}
= something >> arg >> x3::eps;
}
的确,返回的正是这样一个与其定义紧密耦合的“未命名”规则。
我没有立即看到为什么你不能通过“允许”以这种方式组合大量解析器——接受更高的编译开销——并在设置点(例如,作为递归入口点的高级规则)。请记住,这些点将对上下文进行硬编码 - 包括任何船长 and/or (自定义)指令状态 - 因此它可能会改变行为。我敢打赌,如果发生这种情况,结果会更清晰,更容易维护。
对于我之前提出的问题,这个问题的跟进太迟了。基本上,我有一个相当参数化的解析器,用于一系列用 boost spirit x3 编写的语言。 @sehe 极大地帮助我做到了这一点:
不过,既然我在那里,我有一些奇怪的(对我来说)爆炸式编译时间。我无法 post 原始代码,因此一直在研究 'minimal' 示例并最终找到了至少一个罪魁祸首。这是我的代码:
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/adapted.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
using x3::char_;
using x3::int_;
using x3::lexeme;
struct boring1 { std::string name; int i; };
struct boring2 { std::string name; int i; };
struct sub1 { std::string name; int i; };
struct sub2 { std::string name; int i; };
struct sub3 { std::string name; int i; };
struct sub4 { std::string name; int i; };
struct sub : x3::variant<sub1,sub2,sub3,sub4> {
sub()=default;
sub(const sub&)=default;
sub& operator=(const sub&)=default;
using base_type::base_type;
using base_type::operator=;
};
struct leaf1 { char tag; int i; };
struct leaf2 { char tag; int i, j, k; };
struct leaf3 { std::string name; int i, j; };
struct leaf23 : x3::variant<leaf2, leaf3> {
leaf23(const leaf23&)=default;
leaf23()=default;
leaf23& operator=(const leaf23&)=default;
using base_type::base_type;
using base_type::operator=;
};
template <typename T> struct node1 { std::string name; T leaf; };
template <typename T> struct node2 { std::string name; T leaf; };
template <typename T> struct node3 { std::string name; T leaf; };
template <typename T> struct node4 { std::string name; std::vector<T> leafs; };
template <typename T> struct node5 { std::string name; std::vector<T> leafs; };
template <typename T> struct node6 { std::string name; std::vector<T> leafs; };
template <typename T> struct cont1 { int i; std::string name; std::vector<T> ts; };
template <typename T> struct cont2 { unsigned i; char t; std::string name; std::vector<T> ts; };
template <typename T, typename U> struct cont3 {
std::vector<T> ts; std::string name; std::vector<U> us;
};
template <typename T, typename U> struct cont4 {
std::string name; std::vector<U> us; std::vector<T> ts;
};
template <typename T> struct program { int name; std::vector<T> stmts; };
BOOST_FUSION_ADAPT_STRUCT(boring1, name, i)
BOOST_FUSION_ADAPT_STRUCT(boring2, name, i)
BOOST_FUSION_ADAPT_STRUCT(sub1, name, i)
BOOST_FUSION_ADAPT_STRUCT(sub2, name, i)
BOOST_FUSION_ADAPT_STRUCT(sub3, name, i)
BOOST_FUSION_ADAPT_STRUCT(sub4, name, i)
BOOST_FUSION_ADAPT_STRUCT(leaf1, tag, i)
BOOST_FUSION_ADAPT_STRUCT(leaf2, tag, i, j, k)
BOOST_FUSION_ADAPT_STRUCT(leaf3, name, i, j)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node1)(T), name, leaf)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node2)(T), name, leaf)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node3)(T), name, leaf)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node4)(T), name, leafs)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node5)(T), name, leafs)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node6)(T), name, leafs)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (cont1)(T), i, name, ts)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (cont2)(T), i, t, name, ts)
BOOST_FUSION_ADAPT_TPL_STRUCT((T) (U), (cont3)(T)(U), ts, name, us)
BOOST_FUSION_ADAPT_TPL_STRUCT((T) (U), (cont4)(T)(U), name, us, ts)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (program)(T), name, stmts)
template <typename T> struct as_type {
template <typename Expr>
auto operator[](Expr&& expr) const {
return x3::rule<struct _, T>{"as"} = x3::as_parser(std::forward<Expr>(expr));
}
};
template <typename T> static const as_type<T> as = {};
namespace Generic {
x3::rule<class boring1, boring1> const boring1_p = "boring 1 parser";
auto const boring1_p_def = lexeme[ +x3::alnum ] >> int_;
x3::rule<class boring2, boring2> const boring2_p = "boring 2 parser";
auto const boring2_p_def = lexeme[ +x3::alnum ] >> int_;
BOOST_SPIRIT_DEFINE( boring1_p, boring2_p )
x3::rule<class sub1, sub1> const sub1_p = "sub 1 parser";
auto const sub1_p_def = lexeme[ +x3::alnum ] >> int_;
x3::rule<class sub2, sub2> const sub2_p = "sub 2 parser";
auto const sub2_p_def = lexeme[ +x3::alnum ] >> int_;
x3::rule<class sub3, sub3> const sub3_p = "sub 3 parser";
auto const sub3_p_def = lexeme[ +x3::alnum ] >> int_;
x3::rule<class sub4, sub4> const sub4_p = "sub 4 parser";
auto const sub4_p_def = lexeme[ +x3::alnum ] >> int_;
x3::rule<class sub, sub> const sub_p = "combined sub parser";
auto const sub_p_def = sub1_p | sub2_p | sub3_p | sub4_p;
BOOST_SPIRIT_DEFINE( sub1_p, sub2_p, sub3_p, sub4_p, sub_p )
template <typename T> auto leaf_p = x3::eps(false);
template <typename T> auto leaves() {
return as<std::vector<T>>[ leaf_p<T> % ',' ];
}
template <> auto leaf_p<leaf1> = as<leaf1>[lexeme[ (char_('t') | char_('u')) >> int_ ]];
template <> auto leaf_p<leaf2> = as<leaf2>[lexeme[ char_('t') >> int_ >> "_" >> int_ >> "_" >> int_]];
template <> auto leaf_p<leaf3> = as<leaf3>[lexeme[ (+x3::alnum) >> "_" >> int_ >> "_" >> int_]];
template <> auto leaf_p<leaf23> = as<leaf23>[leaf_p<leaf2> | leaf_p<leaf3>];
template <typename T> auto parseNode1() {
return as<node1<T>>[ (x3::string("xx") | x3::string("yy") | x3::string("zz")) >> leaf_p<T>];
}
template <typename T> auto parseNode2() {
return as<node2<T>>[ x3::string("node1") >> leaf_p<T>];
}
template <typename T> auto parseNode3() {
return as<node3<T>>[ x3::string("node3") >> leaf_p<T>];
}
template <typename T> auto parseNode4() {
return as<node4<T>>[ x3::string("node4") >> leaves<T>()];
}
template <typename T> auto parseNode5() {
return as<node5<T>>[ x3::string("node5") >> leaves<T>()];
}
template <typename T> auto parseNode6() {
return as<node6<T>>[ x3::string("node6") >> leaves<T>()];
}
struct With1 {};
struct With2 {};
x3::rule<class header1,std::string> const header1 = "header 1";
auto const header1_def = lexeme[+x3::alnum][([](auto& ctx) {
std::string v = x3::_attr(ctx);
x3::get<With1>(ctx) = v;
x3::_val(ctx) = v;
})];
x3::rule<class footer1,std::string> const footer1 = "footer 1";
auto const footer1_def = lexeme[+x3::alnum][([](auto& ctx) {
std::string v = x3::get<With1>(ctx);
x3::_pass(ctx) = v == x3::_attr(ctx);
})];
x3::rule<class header2,std::string> const header2 = "header 2";
auto const header2_def = lexeme[+x3::alnum][([](auto& ctx) {
std::string v = x3::_attr(ctx);
x3::get<With2>(ctx) = v;
x3::_val(ctx) = v;
})];
x3::rule<class footer2,std::string> const footer2 = "footer 2";
auto const footer2_def = lexeme[+x3::alnum][([](auto& ctx) {
std::string v = x3::get<With2>(ctx);
x3::_pass(ctx) = v == x3::_attr(ctx);
})];
BOOST_SPIRIT_DEFINE(header1, footer1, header2, footer2);
template <typename T>
auto const parseCont1(x3::rule<class c1, T> const& parser) {
return as<cont1<T>>[ int_ >> x3::string("cont1") >> "{" >> +parser >> "}" ];
}
template <typename T>
auto const parseCont2(x3::rule<class c1, T> const& parser) {
return as<cont2<T>>[ x3::bin >> char_ >> header1 >> "{" >> +parser >> "}" >> x3::omit[footer1] ];
}
template <typename T, typename U>
auto const parseCont3(x3::rule<class c1, U> const& parser) {
return as<cont3<T,U>>[ leaves<T>() >> x3::string("cont3") >> "{" >> +parser >> "}" ];
}
auto vectorize = [](auto &ctx){ _val(ctx) = { _attr(ctx) }; };
template <typename stmt>
auto cont4_simple_body(x3::rule<class c1, stmt> const& parser) {
return as<std::vector<stmt>>[parser[ vectorize ]];
}
/* What follows are three different possible implementations of cont4_body, which
* is used in the parseCont4 rule below.
* - the first parses wrong, but is fast to compile (I don't understand why this is
* the case, but in tests from my real language, it produces empty vectors,
* always
* - the second is correct, but slower (maybe 2x)
* - the third is also correct, but slower than the above (another 2-3x)
*/
// wrong but fast compile: ~64s when just Language1 is enabled below in main
// template <typename stmt>
// auto cont4_body(x3::rule<class c1, stmt> const& parser) {
// return
// as<std::vector<stmt>>
// [ "{" >> +parser >> "}"
// | parser[vectorize]
// ];
// }
// correct, slower compile: ~189s when just Language1 is enabled below in main
// template <typename stmt>
// auto cont4_body(x3::rule<class c1, stmt> const& parser) {
// return
// as<std::vector<stmt>>
// [ "{" >> +parser >> "}"
// | cont4_simple_body(parser)
// ];
// }
// correct, even slower compile: ~424s when just Language1 is enabled below in main
template <typename stmt>
auto cont4_body(x3::rule<class c1, stmt> const& parser) {
return
as<std::vector<stmt>>["{" >> + parser >> "}"]
| as<std::vector<stmt>>[cont4_simple_body(parser)]
;
}
template <typename T, typename U>
auto const parseCont4(x3::rule<class c1, U> const& parser) {
return as<cont4<T,U>>[ header2 >> cont4_body(parser) >> leaves<T>() >> x3::omit[footer2]];
}
}
namespace Language1 {
struct lang1_stmt;
struct lang1_stmt : x3::variant<
boring1,
boring2,
sub,
node1<leaf1>,
node2<leaf1>,
node3<leaf1>,
node4<leaf1>,
node5<leaf1>,
node6<leaf1>,
x3::forward_ast<cont1<lang1_stmt>>,
x3::forward_ast<cont2<lang1_stmt>>,
x3::forward_ast<cont3<leaf1, lang1_stmt>>,
x3::forward_ast<cont4<leaf1, lang1_stmt>>
> {
lang1_stmt(const lang1_stmt&)=default;
lang1_stmt()=default;
lang1_stmt& operator=(const lang1_stmt&)=default;
using base_type::base_type;
using base_type::operator=;
};
x3::rule<class Generic::c1, lang1_stmt> lang1_stmt_p = "lang1 stmt parser";
auto const lang1_stmt_p_def =
Generic::boring1_p
| Generic::boring2_p
| Generic::sub_p
| Generic::parseNode1<leaf1>()
| Generic::parseNode2<leaf1>()
| Generic::parseNode3<leaf1>()
| Generic::parseNode4<leaf1>()
| Generic::parseNode5<leaf1>()
| Generic::parseNode6<leaf1>()
| Generic::parseCont1<lang1_stmt>( lang1_stmt_p )
| Generic::parseCont2<lang1_stmt>( lang1_stmt_p )
| Generic::parseCont3<leaf1, lang1_stmt>( lang1_stmt_p )
| Generic::parseCont4<leaf1, lang1_stmt>( lang1_stmt_p )
;
BOOST_SPIRIT_DEFINE( lang1_stmt_p );
std::string with1;
std::string with2;
auto const stmts
= x3::rule<class grammar_stmts1, std::vector<lang1_stmt>> { "Lang1 statements" }
= x3::with<Generic::With1>(with1)
[x3::with<Generic::With2>(with2)[ +lang1_stmt_p ]];
auto const grammar
= x3::rule<class grammar1, program<lang1_stmt>> { "Lang1 grammar" }
= x3::skip( x3::space )[x3::int_ >> stmts];
}
namespace Language2 {
struct lang2_stmt;
struct lang2_stmt : x3::variant<
boring1,
boring2,
sub,
node1<leaf23>,
node2<leaf23>,
node3<leaf23>,
node4<leaf23>,
node5<leaf23>,
node6<leaf23>,
x3::forward_ast<cont1<lang2_stmt>>,
x3::forward_ast<cont2<lang2_stmt>>,
x3::forward_ast<cont3<leaf23, lang2_stmt>>,
x3::forward_ast<cont4<leaf23, lang2_stmt>>
> {
lang2_stmt(const lang2_stmt&)=default;
lang2_stmt()=default;
lang2_stmt& operator=(const lang2_stmt&)=default;
using base_type::base_type;
using base_type::operator=;
};
x3::rule<class Generic::c1, lang2_stmt> lang2_stmt_p = "lang2 stmt parser";
auto const lang2_stmt_p_def =
Generic::boring1_p
| Generic::boring2_p
| Generic::sub_p
| Generic::parseNode1<leaf23>()
| Generic::parseNode2<leaf23>()
| Generic::parseNode3<leaf23>()
| Generic::parseNode4<leaf23>()
| Generic::parseNode5<leaf23>()
| Generic::parseNode6<leaf23>()
| Generic::parseCont1<lang2_stmt>( lang2_stmt_p )
| Generic::parseCont2<lang2_stmt>( lang2_stmt_p )
| Generic::parseCont3<leaf23, lang2_stmt>( lang2_stmt_p )
| Generic::parseCont4<leaf23, lang2_stmt>( lang2_stmt_p )
;
BOOST_SPIRIT_DEFINE( lang2_stmt_p );
std::string with1;
std::string with2;
auto const stmts
= x3::rule<class grammar_stmts2, std::vector<lang2_stmt>> { "Lang2 statements" }
= x3::with<Generic::With1>(with1)
[x3::with<Generic::With2>(with2)[ +lang2_stmt_p ]];
auto const grammar
= x3::rule<class grammar2, program<lang2_stmt>> { "Lang2 grammar" }
= x3::skip( x3::space )[x3::int_ >> stmts];
}
void test(auto const& grammar, std::string text, auto ast) {
auto f = text.begin(), l = text.end();
std::cout << "\nParsing: " << std::quoted(text, '\'') << "\n";
if (boost::spirit::x3::parse(f, l, grammar, ast)) {
std::cout << "Success!\n";
} else {
std::cout << " -- Failed " << std::quoted(text, '\'') << "\n";
}
}
int main() {
test( Language1::grammar,
"",
program<Language1::lang1_stmt>{});
// test(
// Language2::grammar,
// "",
// program<Language2::lang2_stmt>{});
}
你可以在这个例子中看到我定义了两种语言。它们只是在 ast 的叶子上有所不同,Language2 中语言的目标比 Language1 中的更复杂。我已经复制了我在真实解析器中使用的所有功能,但是为不同的语句类型创建了愚蠢的假解析器。值得注意的是:我看到的性能问题在 Language2 中被大大夸大了。我实际上只在我的主要功能中链接到 Language1 以节省测试时间。
问题似乎出在我对条件语句的编码中,尤其是 cond4。在这里,条件可以有选择地有大括号。如果有 none,则只包含一个语句。请注意我包含的条件主体的解析器的 3 种不同实现。第一个实际上解析不正确(我的意思是,解析成功,但向量始终为空!)。接下来的两个正确解析但大大增加了编译时间。
知道这里发生了什么吗?
当我使用本地定义的“匿名”规则时,爆炸性的编译时间通常会打击我;我的意思是 不是 通过类型标记参数使用 BOOST_SPRIIT_* 宏与其解析器实现相关联的规则,而是隐式地。
要注意的是,“隐式”规则定义被带入上下文类型,这会导致最坏情况下的模板实例化行为很糟糕,尤其是面对
- 递归规则
- 本地更换船长(
lexeme[]
。skip[]
,noskip[]
) - 任何其他上下文修饰指令(
x3::with<Tag>(expr) [p]
或任何基于它的构建指令)
因此,原则上避免爆炸的一个好方法是努力将规则实例与声明站点分开(使用宏),该过程通常称为“编译防火墙”- 因为它包含单独翻译单元中的实现细节。
这种方法与完全按功能生成解析器的想法之间存在轻微的紧张关系,因为原型解析器工厂看起来有点像
auto makeParser(auto arg) {
return
x3::rule<struct tag_type, Attribute> {"aarg"}
= something >> arg >> x3::eps;
}
的确,返回的正是这样一个与其定义紧密耦合的“未命名”规则。
我没有立即看到为什么你不能通过“允许”以这种方式组合大量解析器——接受更高的编译开销——并在设置点(例如,作为递归入口点的高级规则)。请记住,这些点将对上下文进行硬编码 - 包括任何船长 and/or (自定义)指令状态 - 因此它可能会改变行为。我敢打赌,如果发生这种情况,结果会更清晰,更容易维护。