使用带有一些自定义 bnf 规则的 Boost Spirit 解析 BNF 语法
Parsing BNF Grammar using Boost Spirit with some custom bnf rules
假设我有如下所示的 BNF 语法。现在 'List' 将对应于 '|' 之前的所有术语象征。但是,我想读取每个 'List' 的最后一个数字作为 'List' 的属性。
<code> ::= <code> <line> 12 2 | <line> 24 4
<line> ::= <ifte> 13 23 | <loop> 24 34 | <action> 15 3
<ifte> ::= if <cond> {<code>} else {<code>} 12
此外,最后一个数字(列表属性)可以是可选的;我想为了让这更容易,我可能不得不使用一些符号来包围数字以便于解析,例如 <<23>>.
code from 编译但不解析上面的语法:
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
/*#include <fmt/ranges.h>
#include <fmt/ostream.h>*/
#include <iomanip>
namespace AST {
struct Name : std::string {
using std::string::string;
using std::string::operator=;
friend std::ostream &operator<<(std::ostream &os, Name const &n) {
return os << '<' << n.c_str() << '>';
}
};
using Term = boost::variant<Name, std::string>;
struct List {
std::vector<Term> terms;
int number;
};
using Expression = std::vector<List>;
struct Rule {
Name name; //rhs
Expression rhs;
};
using Syntax = std::vector<Rule>;
}
BOOST_FUSION_ADAPT_STRUCT(AST::List, terms, number)
BOOST_FUSION_ADAPT_STRUCT(AST::Rule, name, rhs)
namespace Parser {
namespace qi = boost::spirit::qi;
template<typename Iterator>
class BNF : public qi::grammar<Iterator, AST::Syntax()> {
public:
BNF() : BNF::base_type(start) {
start = qi::skip(blank)[rule % +qi::eol];
_rule_name = qi::hold[qi::char_('<') >> (qi::alpha >> *(qi::alnum | qi::char_('-'))) >> qi::char_('>')];
_list = +term >> qi::uint_;
term = _literal | _rule_name;
_literal = qi::hold['"' >> *(character - '"') >> '"']
| qi::hold["'" >> *(character - "'") >> "'"]
| qi::hold[+(qi::graph - qi::char_("<|>") - "::=")];
character = qi::alnum | qi::char_("\"'| !#$%&()*+,./:;>=<?@]\^_`{}~[-");
_expression = _list % '|';
rule = _rule_name >> "::=" >> _expression;
BOOST_SPIRIT_DEBUG_NODES((rule)(_expression)(_list)(term)(_literal)(
character)(_rule_name))
}
private:
qi::rule<Iterator> blank;
qi::rule<Iterator, AST::Syntax()> start;
qi::rule<Iterator, AST::Rule(), qi::rule<Iterator>> rule;
qi::rule<Iterator, AST::Expression(), qi::rule<Iterator>> _expression;
qi::rule<Iterator, AST::List(), qi::rule<Iterator>> _list;
qi::rule<Iterator, AST::Term()> term;
qi::rule<Iterator, AST::Name()> _rule_name;
qi::rule<Iterator, std::string()> _literal;
qi::rule<Iterator, char()> character;
};
}
int main() {
Parser::BNF<std::string::const_iterator> const parser;
}
我如何fix/modify上面的代码link来满足我的需要。
我觉得不清楚你想支持什么输入语法。
例如,
- 当列表属性可以是可选的时,这是否意味着代替
<code> <line> 12 2
这也将是一个没有属性的有效列表:<code> <line> 12 2
?您将如何避免将 12
解析为属性?
- 您的输入使用 {} 中的名称 - 您显示的解析器实现显然不支持。你需要支持吗?怎么样?
让我们一起解决它们
广告 2.:修复您的输入
让我们假设您确实不希望 {} 具有神奇的含义,但希望将它们作为语法中的文字。像“if”和“else”一样,它们需要是文字,所以:
<ifte> ::= 'if' <cond> '{' <code> '}' 'else' '{' <code> '}' 23
或
<ifte> ::= "if" <cond> "{" <code> "}" "else" "{" <code> "}" 23
这修复了你的样本:Live On Compiler Explorer:
code ::= <code><line> 34 | <line> 34
line ::= <ifte> 23 | <loop> 34 | <action> 23
ifte ::= if<cond>{<code>}else{<code>} 23
Remaining: "
"
ad 1.: 可选属性
让我们表达我们的意图:
using ListAttribute = int;
struct List {
std::list<Term> terms;
ListAttribute attribute;
};
然后在文法中添加一个词位规则(无skipper):
qi::rule<Iterator, Ast::ListAttribute()> _attribute;
然后我们像这样实现:
_attribute = lexeme [ "<<" >> qi::uint_ >> ">>" ]
| qi::attr(0);
_list = +_term >> _attribute;
现在它只会将 <> 识别为列表属性:
std::string const input =
"<code> ::= <code> <line> | <line>\n"
"<line> ::= <ifte> | <loop> | <action>\n"
"<ifte> ::= 'if' <cond> '{' <code> '}' 'else' '{' <code> '}'\n"
"<code> ::= <code> <line> <<34>> | <line> <<34>>\n"
"<line> ::= <ifte> <<23>> | <loop> <<34>> | <action> <<23>>\n"
"<ifte> ::= 'if' <cond> '{' <code> '}' 'else' '{' <code> '}' <<23>>\n"
// and the disambiguated example from the question
"<code> ::= <code> <line> '34' | <line> '12' <<2>>\n"
;
打印
code ::= <code><line> 0 | <line> 0
line ::= <ifte> 0 | <loop> 0 | <action> 0
ifte ::= if<cond>{<code>}else{<code>} 0
code ::= <code><line> 34 | <line> 34
line ::= <ifte> 23 | <loop> 34 | <action> 23
ifte ::= if<cond>{<code>}else{<code>} 23
code ::= <code><line>34 0 | <line>12 2
Remaining: "
"
Summary/Bonus
我刚刚意识到您不需要区分 12 2
和 12
(缺少属性),因为 12
无论如何都不是有效的输入标记(literals/names 从 <"'
之一开始,所以这里是:
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <fmt/ranges.h>
#include <fmt/ostream.h>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace Ast {
struct Name : std::string {
using std::string::string;
using std::string::operator=;
friend std::ostream& operator<<(std::ostream& os, Name const& n) {
return os << '<' << n.c_str() << '>';
}
};
using Term = boost::variant<Name, std::string>;
using ListAttribute = int;
struct List {
std::list<Term> terms;
ListAttribute attribute;
friend std::ostream& operator<<(std::ostream& os, List const& l) {
for (auto& t : l.terms)
os << t;
return os << " " << l.attribute;
}
};
using Expression = std::list<List>;
struct Rule {
Name name; // lhs
Expression rhs;
};
using Syntax = std::list<Rule>;
}
BOOST_FUSION_ADAPT_STRUCT(Ast::List, terms, attribute)
BOOST_FUSION_ADAPT_STRUCT(Ast::Rule, name, rhs)
namespace Parser {
template <typename Iterator>
struct BNF: qi::grammar<Iterator, Ast::Syntax()> {
BNF(): BNF::base_type(start) {
using namespace qi;
start = skip(blank) [ _rule % +eol ];
_rule = _rule_name >> "::=" >> _expression;
_expression = _list % '|';
_attribute = uint_ | qi::attr(0);
_list = +_term >> _attribute;
_term = _literal | _rule_name ;
_literal = '"' >> *(_character - '"') >> '"'
| "'" >> *(_character - "'") >> "'";
_character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\^_`{}~[-");
_rule_name = '<' >> (alpha >> *(alnum | char_('-'))) >> '>';
BOOST_SPIRIT_DEBUG_NODES(
(_rule)(_expression)(_list)(_attribute)(_term)
(_literal)(_character)
(_rule_name))
}
private:
qi::rule<Iterator, Ast::Syntax()> start;
qi::rule<Iterator, Ast::Rule(), qi::blank_type> _rule;
qi::rule<Iterator, Ast::Expression(), qi::blank_type> _expression;
qi::rule<Iterator, Ast::List(), qi::blank_type> _list;
// lexemes
qi::rule<Iterator, Ast::ListAttribute()> _attribute;
qi::rule<Iterator, Ast::Term()> _term;
qi::rule<Iterator, Ast::Name()> _rule_name;
qi::rule<Iterator, std::string()> _literal;
qi::rule<Iterator, char()> _character;
};
}
int main() {
Parser::BNF<std::string::const_iterator> const parser;
std::string const input =
"<code> ::= <code> <line> | <line>\n"
"<line> ::= <ifte> | <loop> | <action>\n"
"<ifte> ::= 'if' <cond> '{' <code> '}' 'else' '{' <code> '}'\n"
"<code> ::= <code> <line> 34 | <line> 34\n"
"<line> ::= <ifte> 23 | <loop> 34 | <action> 23\n"
"<ifte> ::= 'if' <cond> '{' <code> '}' 'else' '{' <code> '}' 23\n"
// and the disambiguated example from the question
"<code> ::= <code> <line> '34' | <line> '12' 2\n"
;
auto it = input.begin(), itEnd = input.end();
Ast::Syntax syntax;
if (parse(it, itEnd, parser, syntax)) {
for (auto& rule : syntax)
fmt::print("{} ::= {}\n", rule.name, fmt::join(rule.rhs, " | "));
} else {
std::cout << "Failed\n";
}
if (it != itEnd)
std::cout << "Remaining: " << std::quoted(std::string(it, itEnd)) << "\n";
}
打印
code ::= <code><line> 0 | <line> 0
line ::= <ifte> 0 | <loop> 0 | <action> 0
ifte ::= if<cond>{<code>}else{<code>} 0
code ::= <code><line> 34 | <line> 34
line ::= <ifte> 23 | <loop> 34 | <action> 23
ifte ::= if<cond>{<code>}else{<code>} 23
code ::= <code><line>34 0 | <line>12 2
Remaining: "
"
假设我有如下所示的 BNF 语法。现在 'List' 将对应于 '|' 之前的所有术语象征。但是,我想读取每个 'List' 的最后一个数字作为 'List' 的属性。
<code> ::= <code> <line> 12 2 | <line> 24 4
<line> ::= <ifte> 13 23 | <loop> 24 34 | <action> 15 3
<ifte> ::= if <cond> {<code>} else {<code>} 12
此外,最后一个数字(列表属性)可以是可选的;我想为了让这更容易,我可能不得不使用一些符号来包围数字以便于解析,例如 <<23>>.
code from
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
/*#include <fmt/ranges.h>
#include <fmt/ostream.h>*/
#include <iomanip>
namespace AST {
struct Name : std::string {
using std::string::string;
using std::string::operator=;
friend std::ostream &operator<<(std::ostream &os, Name const &n) {
return os << '<' << n.c_str() << '>';
}
};
using Term = boost::variant<Name, std::string>;
struct List {
std::vector<Term> terms;
int number;
};
using Expression = std::vector<List>;
struct Rule {
Name name; //rhs
Expression rhs;
};
using Syntax = std::vector<Rule>;
}
BOOST_FUSION_ADAPT_STRUCT(AST::List, terms, number)
BOOST_FUSION_ADAPT_STRUCT(AST::Rule, name, rhs)
namespace Parser {
namespace qi = boost::spirit::qi;
template<typename Iterator>
class BNF : public qi::grammar<Iterator, AST::Syntax()> {
public:
BNF() : BNF::base_type(start) {
start = qi::skip(blank)[rule % +qi::eol];
_rule_name = qi::hold[qi::char_('<') >> (qi::alpha >> *(qi::alnum | qi::char_('-'))) >> qi::char_('>')];
_list = +term >> qi::uint_;
term = _literal | _rule_name;
_literal = qi::hold['"' >> *(character - '"') >> '"']
| qi::hold["'" >> *(character - "'") >> "'"]
| qi::hold[+(qi::graph - qi::char_("<|>") - "::=")];
character = qi::alnum | qi::char_("\"'| !#$%&()*+,./:;>=<?@]\^_`{}~[-");
_expression = _list % '|';
rule = _rule_name >> "::=" >> _expression;
BOOST_SPIRIT_DEBUG_NODES((rule)(_expression)(_list)(term)(_literal)(
character)(_rule_name))
}
private:
qi::rule<Iterator> blank;
qi::rule<Iterator, AST::Syntax()> start;
qi::rule<Iterator, AST::Rule(), qi::rule<Iterator>> rule;
qi::rule<Iterator, AST::Expression(), qi::rule<Iterator>> _expression;
qi::rule<Iterator, AST::List(), qi::rule<Iterator>> _list;
qi::rule<Iterator, AST::Term()> term;
qi::rule<Iterator, AST::Name()> _rule_name;
qi::rule<Iterator, std::string()> _literal;
qi::rule<Iterator, char()> character;
};
}
int main() {
Parser::BNF<std::string::const_iterator> const parser;
}
我如何fix/modify上面的代码link来满足我的需要。
我觉得不清楚你想支持什么输入语法。
例如,
- 当列表属性可以是可选的时,这是否意味着代替
<code> <line> 12 2
这也将是一个没有属性的有效列表:<code> <line> 12 2
?您将如何避免将12
解析为属性? - 您的输入使用 {} 中的名称 - 您显示的解析器实现显然不支持。你需要支持吗?怎么样?
让我们一起解决它们
广告 2.:修复您的输入
让我们假设您确实不希望 {} 具有神奇的含义,但希望将它们作为语法中的文字。像“if”和“else”一样,它们需要是文字,所以:
<ifte> ::= 'if' <cond> '{' <code> '}' 'else' '{' <code> '}' 23
或
<ifte> ::= "if" <cond> "{" <code> "}" "else" "{" <code> "}" 23
这修复了你的样本:Live On Compiler Explorer:
code ::= <code><line> 34 | <line> 34
line ::= <ifte> 23 | <loop> 34 | <action> 23
ifte ::= if<cond>{<code>}else{<code>} 23
Remaining: "
"
ad 1.: 可选属性
让我们表达我们的意图:
using ListAttribute = int;
struct List {
std::list<Term> terms;
ListAttribute attribute;
};
然后在文法中添加一个词位规则(无skipper):
qi::rule<Iterator, Ast::ListAttribute()> _attribute;
然后我们像这样实现:
_attribute = lexeme [ "<<" >> qi::uint_ >> ">>" ]
| qi::attr(0);
_list = +_term >> _attribute;
现在它只会将 <> 识别为列表属性:
std::string const input =
"<code> ::= <code> <line> | <line>\n"
"<line> ::= <ifte> | <loop> | <action>\n"
"<ifte> ::= 'if' <cond> '{' <code> '}' 'else' '{' <code> '}'\n"
"<code> ::= <code> <line> <<34>> | <line> <<34>>\n"
"<line> ::= <ifte> <<23>> | <loop> <<34>> | <action> <<23>>\n"
"<ifte> ::= 'if' <cond> '{' <code> '}' 'else' '{' <code> '}' <<23>>\n"
// and the disambiguated example from the question
"<code> ::= <code> <line> '34' | <line> '12' <<2>>\n"
;
打印
code ::= <code><line> 0 | <line> 0
line ::= <ifte> 0 | <loop> 0 | <action> 0
ifte ::= if<cond>{<code>}else{<code>} 0
code ::= <code><line> 34 | <line> 34
line ::= <ifte> 23 | <loop> 34 | <action> 23
ifte ::= if<cond>{<code>}else{<code>} 23
code ::= <code><line>34 0 | <line>12 2
Remaining: "
"
Summary/Bonus
我刚刚意识到您不需要区分 12 2
和 12
(缺少属性),因为 12
无论如何都不是有效的输入标记(literals/names 从 <"'
之一开始,所以这里是:
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <fmt/ranges.h>
#include <fmt/ostream.h>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace Ast {
struct Name : std::string {
using std::string::string;
using std::string::operator=;
friend std::ostream& operator<<(std::ostream& os, Name const& n) {
return os << '<' << n.c_str() << '>';
}
};
using Term = boost::variant<Name, std::string>;
using ListAttribute = int;
struct List {
std::list<Term> terms;
ListAttribute attribute;
friend std::ostream& operator<<(std::ostream& os, List const& l) {
for (auto& t : l.terms)
os << t;
return os << " " << l.attribute;
}
};
using Expression = std::list<List>;
struct Rule {
Name name; // lhs
Expression rhs;
};
using Syntax = std::list<Rule>;
}
BOOST_FUSION_ADAPT_STRUCT(Ast::List, terms, attribute)
BOOST_FUSION_ADAPT_STRUCT(Ast::Rule, name, rhs)
namespace Parser {
template <typename Iterator>
struct BNF: qi::grammar<Iterator, Ast::Syntax()> {
BNF(): BNF::base_type(start) {
using namespace qi;
start = skip(blank) [ _rule % +eol ];
_rule = _rule_name >> "::=" >> _expression;
_expression = _list % '|';
_attribute = uint_ | qi::attr(0);
_list = +_term >> _attribute;
_term = _literal | _rule_name ;
_literal = '"' >> *(_character - '"') >> '"'
| "'" >> *(_character - "'") >> "'";
_character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\^_`{}~[-");
_rule_name = '<' >> (alpha >> *(alnum | char_('-'))) >> '>';
BOOST_SPIRIT_DEBUG_NODES(
(_rule)(_expression)(_list)(_attribute)(_term)
(_literal)(_character)
(_rule_name))
}
private:
qi::rule<Iterator, Ast::Syntax()> start;
qi::rule<Iterator, Ast::Rule(), qi::blank_type> _rule;
qi::rule<Iterator, Ast::Expression(), qi::blank_type> _expression;
qi::rule<Iterator, Ast::List(), qi::blank_type> _list;
// lexemes
qi::rule<Iterator, Ast::ListAttribute()> _attribute;
qi::rule<Iterator, Ast::Term()> _term;
qi::rule<Iterator, Ast::Name()> _rule_name;
qi::rule<Iterator, std::string()> _literal;
qi::rule<Iterator, char()> _character;
};
}
int main() {
Parser::BNF<std::string::const_iterator> const parser;
std::string const input =
"<code> ::= <code> <line> | <line>\n"
"<line> ::= <ifte> | <loop> | <action>\n"
"<ifte> ::= 'if' <cond> '{' <code> '}' 'else' '{' <code> '}'\n"
"<code> ::= <code> <line> 34 | <line> 34\n"
"<line> ::= <ifte> 23 | <loop> 34 | <action> 23\n"
"<ifte> ::= 'if' <cond> '{' <code> '}' 'else' '{' <code> '}' 23\n"
// and the disambiguated example from the question
"<code> ::= <code> <line> '34' | <line> '12' 2\n"
;
auto it = input.begin(), itEnd = input.end();
Ast::Syntax syntax;
if (parse(it, itEnd, parser, syntax)) {
for (auto& rule : syntax)
fmt::print("{} ::= {}\n", rule.name, fmt::join(rule.rhs, " | "));
} else {
std::cout << "Failed\n";
}
if (it != itEnd)
std::cout << "Remaining: " << std::quoted(std::string(it, itEnd)) << "\n";
}
打印
code ::= <code><line> 0 | <line> 0
line ::= <ifte> 0 | <loop> 0 | <action> 0
ifte ::= if<cond>{<code>}else{<code>} 0
code ::= <code><line> 34 | <line> 34
line ::= <ifte> 23 | <loop> 34 | <action> 23
ifte ::= if<cond>{<code>}else{<code>} 23
code ::= <code><line>34 0 | <line>12 2
Remaining: "
"