我如何让这个递归规则起作用?
How do I get this recursive rule to work?
我想解析(首先,只识别,保留符号)LaTeX 数学。现在,我在使用超级和下标以及花括号时遇到了麻烦(例如 a^{bc}
及其组合,我的基本 a^b
工作得很好)。一个最小的例子(尽可能短,同时保持可读性):
#include <iostream>
using std::cout;
#include <string>
using std::string;
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
using x3::space;
using x3::char_;
using x3::lit;
using x3::repeat;
x3::rule<struct scripts, string> scripts = "super- and subscripts";
x3::rule<struct braced_thing, string> braced_thing = "thing optionaly surrounded by curly braces";
x3::rule<struct superscript, string> superscript = "superscript";
x3::rule<struct subscript, string> subscript = "subscript";
// main rule: any number of items with or without braces
auto const scripts_def = *braced_thing;
// second level main rule: optional braces, and any number of characters or sub/superscripts
auto const braced_thing_def = -lit('{') >> *(subscript | superscript | repeat(1)[(char_ - "_^{}")]) >> -lit('}');
// superscript: things of the form a^b where a and b can be surrounded by curly braces
auto const superscript_def = braced_thing >> '^' >> braced_thing;
// subscript: things of the form a_b where a and b can be surrounded by curly braces
auto const subscript_def = braced_thing >> '_' >> braced_thing;
BOOST_SPIRIT_DEFINE(scripts)
BOOST_SPIRIT_DEFINE(braced_thing)
BOOST_SPIRIT_DEFINE(superscript)
BOOST_SPIRIT_DEFINE(subscript)
int main()
{
const string input = "a^{b_x y}_z {v_x}^{{x^z}_y}";
string output; // will only contain the characters as the grammar is defined above
auto first = input.begin();
auto last = input.end();
const bool result = x3::phrase_parse(first, last,
scripts,
space,
output);
if(first != last)
std::cout << "partial match only:\n" << output << '\n';
else if(!result)
std::cout << "parse failed!\n";
else
std::cout << "parsing succeeded:\n" << output << '\n';
}
问题是,这个段错误(我确定原因很明显)而且我没有其他方法,好吧,用...表达语法表达这个。
我还没有看@cv_and_he 的建议,而是 live-debugging 我自己看你的语法。我想到了这个:
auto token = lexeme [ +~char_("_^{} \t\r\n") ];
auto simple = '{' >> sequence >> '}' | token;
auto expr = lexeme [ simple % char_("_^") ];
auto sequence_def = expr % +space;
让我想到这里的基本上是 step-by-step rethink/imagining 实际语法的样子。
It took me two tries to think of the right way to get "a b"
parsing (at first I "hacked" it a just another subscript operator in char_(" _^")
but I got the impression that would not lead to an AST as you expect it. The clue being you used a skipper for the space).
现在,没有 AST,但我们只是 "reap" 使用匹配的原始字符串.. x3::raw[...]
.
//#define BOOST_SPIRIT_X3_DEBUG
#include <iostream>
#include <string>
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
namespace grammar {
using namespace x3;
rule<struct _s> sequence { "sequence" };
auto simple = rule<struct _s> {"simple"} = '{' >> sequence >> '}' | lexeme [ +~char_("_^{} \t\r\n") ];
auto expr = rule<struct _e> {"expr"} = lexeme [ simple % char_("_^") ];
auto sequence_def = expr % +space;
BOOST_SPIRIT_DEFINE(sequence)
}
int main() {
for (const std::string input : {
"a",
"a^b", "a_b", "a b",
"{a}^{b}", "{a}_{b}", "{a} {b}",
"a^{b_x y}",
"a^{b_x y}_z {v_x}^{{x^z}_y}"
})
{
std::string output; // will only contain the characters as the grammar is defined above
auto first = input.begin(), last = input.end();
bool result = x3::parse(first, last, x3::raw[grammar::sequence], output);
if (result)
std::cout << "Parse success: '" << output << "'\n";
else
std::cout << "parse failed!\n";
if (last!=first)
std::cout << "remaining unparsed: '" << std::string(first, last) << "'\n";
}
}
输出:
Parse success: 'a'
Parse success: 'a^b'
Parse success: 'a_b'
Parse success: 'a b'
Parse success: '{a}^{b}'
Parse success: '{a}_{b}'
Parse success: '{a} {b}'
Parse success: 'a^{b_x y}'
Parse success: 'a^{b_x y}_z {v_x}^{{x^z}_y}'
启用调试信息的输出:
<sequence>
<try>a</try>
<expr>
<try>a</try>
<simple>
<try>a</try>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: 'a'
<sequence>
<try>a^b</try>
<expr>
<try>a^b</try>
<simple>
<try>a^b</try>
<success>^b</success>
</simple>
<simple>
<try>b</try>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: 'a^b'
<sequence>
<try>a_b</try>
<expr>
<try>a_b</try>
<simple>
<try>a_b</try>
<success>_b</success>
</simple>
<simple>
<try>b</try>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: 'a_b'
<sequence>
<try>a b</try>
<expr>
<try>a b</try>
<simple>
<try>a b</try>
<success> b</success>
</simple>
<success> b</success>
</expr>
<expr>
<try>b</try>
<simple>
<try>b</try>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: 'a b'
<sequence>
<try>{a}^{b}</try>
<expr>
<try>{a}^{b}</try>
<simple>
<try>{a}^{b}</try>
<sequence>
<try>a}^{b}</try>
<expr>
<try>a}^{b}</try>
<simple>
<try>a}^{b}</try>
<success>}^{b}</success>
</simple>
<success>}^{b}</success>
</expr>
<success>}^{b}</success>
</sequence>
<success>^{b}</success>
</simple>
<simple>
<try>{b}</try>
<sequence>
<try>b}</try>
<expr>
<try>b}</try>
<simple>
<try>b}</try>
<success>}</success>
</simple>
<success>}</success>
</expr>
<success>}</success>
</sequence>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: '{a}^{b}'
<sequence>
<try>{a}_{b}</try>
<expr>
<try>{a}_{b}</try>
<simple>
<try>{a}_{b}</try>
<sequence>
<try>a}_{b}</try>
<expr>
<try>a}_{b}</try>
<simple>
<try>a}_{b}</try>
<success>}_{b}</success>
</simple>
<success>}_{b}</success>
</expr>
<success>}_{b}</success>
</sequence>
<success>_{b}</success>
</simple>
<simple>
<try>{b}</try>
<sequence>
<try>b}</try>
<expr>
<try>b}</try>
<simple>
<try>b}</try>
<success>}</success>
</simple>
<success>}</success>
</expr>
<success>}</success>
</sequence>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: '{a}_{b}'
<sequence>
<try>{a} {b}</try>
<expr>
<try>{a} {b}</try>
<simple>
<try>{a} {b}</try>
<sequence>
<try>a} {b}</try>
<expr>
<try>a} {b}</try>
<simple>
<try>a} {b}</try>
<success>} {b}</success>
</simple>
<success>} {b}</success>
</expr>
<success>} {b}</success>
</sequence>
<success> {b}</success>
</simple>
<success> {b}</success>
</expr>
<expr>
<try>{b}</try>
<simple>
<try>{b}</try>
<sequence>
<try>b}</try>
<expr>
<try>b}</try>
<simple>
<try>b}</try>
<success>}</success>
</simple>
<success>}</success>
</expr>
<success>}</success>
</sequence>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: '{a} {b}'
<sequence>
<try>a^{b_x y}</try>
<expr>
<try>a^{b_x y}</try>
<simple>
<try>a^{b_x y}</try>
<success>^{b_x y}</success>
</simple>
<simple>
<try>{b_x y}</try>
<sequence>
<try>b_x y}</try>
<expr>
<try>b_x y}</try>
<simple>
<try>b_x y}</try>
<success>_x y}</success>
</simple>
<simple>
<try>x y}</try>
<success> y}</success>
</simple>
<success> y}</success>
</expr>
<expr>
<try>y}</try>
<simple>
<try>y}</try>
<success>}</success>
</simple>
<success>}</success>
</expr>
<success>}</success>
</sequence>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: 'a^{b_x y}'
<sequence>
<try>a^{b_x y}_z {v_x}^{{</try>
<expr>
<try>a^{b_x y}_z {v_x}^{{</try>
<simple>
<try>a^{b_x y}_z {v_x}^{{</try>
<success>^{b_x y}_z {v_x}^{{x</success>
</simple>
<simple>
<try>{b_x y}_z {v_x}^{{x^</try>
<sequence>
<try>b_x y}_z {v_x}^{{x^z</try>
<expr>
<try>b_x y}_z {v_x}^{{x^z</try>
<simple>
<try>b_x y}_z {v_x}^{{x^z</try>
<success>_x y}_z {v_x}^{{x^z}</success>
</simple>
<simple>
<try>x y}_z {v_x}^{{x^z}_</try>
<success> y}_z {v_x}^{{x^z}_y</success>
</simple>
<success> y}_z {v_x}^{{x^z}_y</success>
</expr>
<expr>
<try>y}_z {v_x}^{{x^z}_y}</try>
<simple>
<try>y}_z {v_x}^{{x^z}_y}</try>
<success>}_z {v_x}^{{x^z}_y}</success>
</simple>
<success>}_z {v_x}^{{x^z}_y}</success>
</expr>
<success>}_z {v_x}^{{x^z}_y}</success>
</sequence>
<success>_z {v_x}^{{x^z}_y}</success>
</simple>
<simple>
<try>z {v_x}^{{x^z}_y}</try>
<success> {v_x}^{{x^z}_y}</success>
</simple>
<success> {v_x}^{{x^z}_y}</success>
</expr>
<expr>
<try>{v_x}^{{x^z}_y}</try>
<simple>
<try>{v_x}^{{x^z}_y}</try>
<sequence>
<try>v_x}^{{x^z}_y}</try>
<expr>
<try>v_x}^{{x^z}_y}</try>
<simple>
<try>v_x}^{{x^z}_y}</try>
<success>_x}^{{x^z}_y}</success>
</simple>
<simple>
<try>x}^{{x^z}_y}</try>
<success>}^{{x^z}_y}</success>
</simple>
<success>}^{{x^z}_y}</success>
</expr>
<success>}^{{x^z}_y}</success>
</sequence>
<success>^{{x^z}_y}</success>
</simple>
<simple>
<try>{{x^z}_y}</try>
<sequence>
<try>{x^z}_y}</try>
<expr>
<try>{x^z}_y}</try>
<simple>
<try>{x^z}_y}</try>
<sequence>
<try>x^z}_y}</try>
<expr>
<try>x^z}_y}</try>
<simple>
<try>x^z}_y}</try>
<success>^z}_y}</success>
</simple>
<simple>
<try>z}_y}</try>
<success>}_y}</success>
</simple>
<success>}_y}</success>
</expr>
<success>}_y}</success>
</sequence>
<success>_y}</success>
</simple>
<simple>
<try>y}</try>
<success>}</success>
</simple>
<success>}</success>
</expr>
<success>}</success>
</sequence>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: 'a^{b_x y}_z {v_x}^{{x^z}_y}'
我想解析(首先,只识别,保留符号)LaTeX 数学。现在,我在使用超级和下标以及花括号时遇到了麻烦(例如 a^{bc}
及其组合,我的基本 a^b
工作得很好)。一个最小的例子(尽可能短,同时保持可读性):
#include <iostream>
using std::cout;
#include <string>
using std::string;
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
using x3::space;
using x3::char_;
using x3::lit;
using x3::repeat;
x3::rule<struct scripts, string> scripts = "super- and subscripts";
x3::rule<struct braced_thing, string> braced_thing = "thing optionaly surrounded by curly braces";
x3::rule<struct superscript, string> superscript = "superscript";
x3::rule<struct subscript, string> subscript = "subscript";
// main rule: any number of items with or without braces
auto const scripts_def = *braced_thing;
// second level main rule: optional braces, and any number of characters or sub/superscripts
auto const braced_thing_def = -lit('{') >> *(subscript | superscript | repeat(1)[(char_ - "_^{}")]) >> -lit('}');
// superscript: things of the form a^b where a and b can be surrounded by curly braces
auto const superscript_def = braced_thing >> '^' >> braced_thing;
// subscript: things of the form a_b where a and b can be surrounded by curly braces
auto const subscript_def = braced_thing >> '_' >> braced_thing;
BOOST_SPIRIT_DEFINE(scripts)
BOOST_SPIRIT_DEFINE(braced_thing)
BOOST_SPIRIT_DEFINE(superscript)
BOOST_SPIRIT_DEFINE(subscript)
int main()
{
const string input = "a^{b_x y}_z {v_x}^{{x^z}_y}";
string output; // will only contain the characters as the grammar is defined above
auto first = input.begin();
auto last = input.end();
const bool result = x3::phrase_parse(first, last,
scripts,
space,
output);
if(first != last)
std::cout << "partial match only:\n" << output << '\n';
else if(!result)
std::cout << "parse failed!\n";
else
std::cout << "parsing succeeded:\n" << output << '\n';
}
问题是,这个段错误(我确定原因很明显)而且我没有其他方法,好吧,用...表达语法表达这个。
我还没有看@cv_and_he 的建议,而是 live-debugging 我自己看你的语法。我想到了这个:
auto token = lexeme [ +~char_("_^{} \t\r\n") ];
auto simple = '{' >> sequence >> '}' | token;
auto expr = lexeme [ simple % char_("_^") ];
auto sequence_def = expr % +space;
让我想到这里的基本上是 step-by-step rethink/imagining 实际语法的样子。
It took me two tries to think of the right way to get
"a b"
parsing (at first I "hacked" it a just another subscript operator inchar_(" _^")
but I got the impression that would not lead to an AST as you expect it. The clue being you used a skipper for the space).
现在,没有 AST,但我们只是 "reap" 使用匹配的原始字符串.. x3::raw[...]
.
//#define BOOST_SPIRIT_X3_DEBUG
#include <iostream>
#include <string>
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
namespace grammar {
using namespace x3;
rule<struct _s> sequence { "sequence" };
auto simple = rule<struct _s> {"simple"} = '{' >> sequence >> '}' | lexeme [ +~char_("_^{} \t\r\n") ];
auto expr = rule<struct _e> {"expr"} = lexeme [ simple % char_("_^") ];
auto sequence_def = expr % +space;
BOOST_SPIRIT_DEFINE(sequence)
}
int main() {
for (const std::string input : {
"a",
"a^b", "a_b", "a b",
"{a}^{b}", "{a}_{b}", "{a} {b}",
"a^{b_x y}",
"a^{b_x y}_z {v_x}^{{x^z}_y}"
})
{
std::string output; // will only contain the characters as the grammar is defined above
auto first = input.begin(), last = input.end();
bool result = x3::parse(first, last, x3::raw[grammar::sequence], output);
if (result)
std::cout << "Parse success: '" << output << "'\n";
else
std::cout << "parse failed!\n";
if (last!=first)
std::cout << "remaining unparsed: '" << std::string(first, last) << "'\n";
}
}
输出:
Parse success: 'a'
Parse success: 'a^b'
Parse success: 'a_b'
Parse success: 'a b'
Parse success: '{a}^{b}'
Parse success: '{a}_{b}'
Parse success: '{a} {b}'
Parse success: 'a^{b_x y}'
Parse success: 'a^{b_x y}_z {v_x}^{{x^z}_y}'
启用调试信息的输出:
<sequence>
<try>a</try>
<expr>
<try>a</try>
<simple>
<try>a</try>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: 'a'
<sequence>
<try>a^b</try>
<expr>
<try>a^b</try>
<simple>
<try>a^b</try>
<success>^b</success>
</simple>
<simple>
<try>b</try>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: 'a^b'
<sequence>
<try>a_b</try>
<expr>
<try>a_b</try>
<simple>
<try>a_b</try>
<success>_b</success>
</simple>
<simple>
<try>b</try>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: 'a_b'
<sequence>
<try>a b</try>
<expr>
<try>a b</try>
<simple>
<try>a b</try>
<success> b</success>
</simple>
<success> b</success>
</expr>
<expr>
<try>b</try>
<simple>
<try>b</try>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: 'a b'
<sequence>
<try>{a}^{b}</try>
<expr>
<try>{a}^{b}</try>
<simple>
<try>{a}^{b}</try>
<sequence>
<try>a}^{b}</try>
<expr>
<try>a}^{b}</try>
<simple>
<try>a}^{b}</try>
<success>}^{b}</success>
</simple>
<success>}^{b}</success>
</expr>
<success>}^{b}</success>
</sequence>
<success>^{b}</success>
</simple>
<simple>
<try>{b}</try>
<sequence>
<try>b}</try>
<expr>
<try>b}</try>
<simple>
<try>b}</try>
<success>}</success>
</simple>
<success>}</success>
</expr>
<success>}</success>
</sequence>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: '{a}^{b}'
<sequence>
<try>{a}_{b}</try>
<expr>
<try>{a}_{b}</try>
<simple>
<try>{a}_{b}</try>
<sequence>
<try>a}_{b}</try>
<expr>
<try>a}_{b}</try>
<simple>
<try>a}_{b}</try>
<success>}_{b}</success>
</simple>
<success>}_{b}</success>
</expr>
<success>}_{b}</success>
</sequence>
<success>_{b}</success>
</simple>
<simple>
<try>{b}</try>
<sequence>
<try>b}</try>
<expr>
<try>b}</try>
<simple>
<try>b}</try>
<success>}</success>
</simple>
<success>}</success>
</expr>
<success>}</success>
</sequence>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: '{a}_{b}'
<sequence>
<try>{a} {b}</try>
<expr>
<try>{a} {b}</try>
<simple>
<try>{a} {b}</try>
<sequence>
<try>a} {b}</try>
<expr>
<try>a} {b}</try>
<simple>
<try>a} {b}</try>
<success>} {b}</success>
</simple>
<success>} {b}</success>
</expr>
<success>} {b}</success>
</sequence>
<success> {b}</success>
</simple>
<success> {b}</success>
</expr>
<expr>
<try>{b}</try>
<simple>
<try>{b}</try>
<sequence>
<try>b}</try>
<expr>
<try>b}</try>
<simple>
<try>b}</try>
<success>}</success>
</simple>
<success>}</success>
</expr>
<success>}</success>
</sequence>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: '{a} {b}'
<sequence>
<try>a^{b_x y}</try>
<expr>
<try>a^{b_x y}</try>
<simple>
<try>a^{b_x y}</try>
<success>^{b_x y}</success>
</simple>
<simple>
<try>{b_x y}</try>
<sequence>
<try>b_x y}</try>
<expr>
<try>b_x y}</try>
<simple>
<try>b_x y}</try>
<success>_x y}</success>
</simple>
<simple>
<try>x y}</try>
<success> y}</success>
</simple>
<success> y}</success>
</expr>
<expr>
<try>y}</try>
<simple>
<try>y}</try>
<success>}</success>
</simple>
<success>}</success>
</expr>
<success>}</success>
</sequence>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: 'a^{b_x y}'
<sequence>
<try>a^{b_x y}_z {v_x}^{{</try>
<expr>
<try>a^{b_x y}_z {v_x}^{{</try>
<simple>
<try>a^{b_x y}_z {v_x}^{{</try>
<success>^{b_x y}_z {v_x}^{{x</success>
</simple>
<simple>
<try>{b_x y}_z {v_x}^{{x^</try>
<sequence>
<try>b_x y}_z {v_x}^{{x^z</try>
<expr>
<try>b_x y}_z {v_x}^{{x^z</try>
<simple>
<try>b_x y}_z {v_x}^{{x^z</try>
<success>_x y}_z {v_x}^{{x^z}</success>
</simple>
<simple>
<try>x y}_z {v_x}^{{x^z}_</try>
<success> y}_z {v_x}^{{x^z}_y</success>
</simple>
<success> y}_z {v_x}^{{x^z}_y</success>
</expr>
<expr>
<try>y}_z {v_x}^{{x^z}_y}</try>
<simple>
<try>y}_z {v_x}^{{x^z}_y}</try>
<success>}_z {v_x}^{{x^z}_y}</success>
</simple>
<success>}_z {v_x}^{{x^z}_y}</success>
</expr>
<success>}_z {v_x}^{{x^z}_y}</success>
</sequence>
<success>_z {v_x}^{{x^z}_y}</success>
</simple>
<simple>
<try>z {v_x}^{{x^z}_y}</try>
<success> {v_x}^{{x^z}_y}</success>
</simple>
<success> {v_x}^{{x^z}_y}</success>
</expr>
<expr>
<try>{v_x}^{{x^z}_y}</try>
<simple>
<try>{v_x}^{{x^z}_y}</try>
<sequence>
<try>v_x}^{{x^z}_y}</try>
<expr>
<try>v_x}^{{x^z}_y}</try>
<simple>
<try>v_x}^{{x^z}_y}</try>
<success>_x}^{{x^z}_y}</success>
</simple>
<simple>
<try>x}^{{x^z}_y}</try>
<success>}^{{x^z}_y}</success>
</simple>
<success>}^{{x^z}_y}</success>
</expr>
<success>}^{{x^z}_y}</success>
</sequence>
<success>^{{x^z}_y}</success>
</simple>
<simple>
<try>{{x^z}_y}</try>
<sequence>
<try>{x^z}_y}</try>
<expr>
<try>{x^z}_y}</try>
<simple>
<try>{x^z}_y}</try>
<sequence>
<try>x^z}_y}</try>
<expr>
<try>x^z}_y}</try>
<simple>
<try>x^z}_y}</try>
<success>^z}_y}</success>
</simple>
<simple>
<try>z}_y}</try>
<success>}_y}</success>
</simple>
<success>}_y}</success>
</expr>
<success>}_y}</success>
</sequence>
<success>_y}</success>
</simple>
<simple>
<try>y}</try>
<success>}</success>
</simple>
<success>}</success>
</expr>
<success>}</success>
</sequence>
<success></success>
</simple>
<success></success>
</expr>
<success></success>
</sequence>
Parse success: 'a^{b_x y}_z {v_x}^{{x^z}_y}'