如何使用boost灵气处理gor解析bnf文法的多行规则

How to handle multi-line rules for gor parsing bnf grammar using boost spirit qi

假设我有这样的 BNF 语法

<code>   ::=  <letter><digit> | <letter><digit><code>
<letter> ::= a | b | c | d | e
             | f | g | h | i
<digit>  ::= 0 | 1 | 2 | 3 |
             4

如果您查看 <letter> 规则,它的延续从 | 开始,但 <digit> 规则的延续开始于 | 出现在上一行的结尾。我也不想使用特定符号来表示规则的结尾。

如何查看规则是否结束使用增强灵气执行。

我刚刚浏览了 boost 页面上的教程,想知道我将如何处理这个问题。

Wikipedia

BNF syntax can only represent a rule in one line, whereas in EBNF a terminating character, the semicolon character “;” marks the end of a rule.

所以简单的答案是:输入不是 BNF。

如果您无论如何都想支持它(风险自负 :)),您必须支持它。所以,让我们写一个简单的 BFN 文法,字面上从 Wikipedia BNF

映射
<syntax>         ::= <rule> | <rule> <syntax>
<rule>           ::= <opt-whitespace> "<" <rule-name> ">" <opt-whitespace> "::=" <opt-whitespace> <expression> <line-end>
<opt-whitespace> ::= " " <opt-whitespace> | ""
<expression>     ::= <list> | <list> <opt-whitespace> "|" <opt-whitespace> <expression>
<line-end>       ::= <opt-whitespace> <EOL> | <line-end> <line-end>
<list>           ::= <term> | <term> <opt-whitespace> <list>
<term>           ::= <literal> | "<" <rule-name> ">"
<literal>        ::= '"' <text1> '"' | "'" <text2> "'"
<text1>          ::= "" | <character1> <text1>
<text2>          ::= '' | <character2> <text2>
<character>      ::= <letter> | <digit> | <symbol>
<letter>         ::= "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
<digit>          ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
<symbol>         ::=  "|" | " " | "!" | "#" | "$" | "%" | "&" | "(" | ")" | "*" | "+" | "," | "-" | "." | "/" | ":" | ";" | ">" | "=" | "<" | "?" | "@" | "[" | "\" | "]" | "^" | "_" | "`" | "{" | "}" | "~"
<character1>     ::= <character> | "'"
<character2>     ::= <character> | '"'
<rule-name>      ::= <letter> | <rule-name> <rule-char>
<rule-char>      ::= <letter> | <digit> | "-"

它可能看起来像这样:

template <typename Iterator>
struct BNF: qi::grammar<Iterator, Ast::Syntax()> {
    BNF(): BNF::base_type(start) {
        using namespace qi;
        start = skip(blank) [ _rule % +eol ];

        _rule       = _rule_name >> "::=" >> _expression;
        _expression = _list % '|';
        _list       = +_term;
        _term       = _literal | _rule_name;
        _literal    = '"' >> *(_character - '"') >> '"'
                    | "'" >> *(_character - "'") >> "'";
        _character  = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\^_`{}~[-");
        _rule_name  = '<' >> (alpha >> *(alnum | char_('-'))) >> '>';

        BOOST_SPIRIT_DEBUG_NODES(
            (_rule)(_expression)(_list)(_term)
            (_literal)(_character)
            (_rule_name))
    }

  private:
    qi::rule<Iterator, Ast::Syntax()>     start;
    qi::rule<Iterator, Ast::Rule(),       qi::blank_type> _rule;
    qi::rule<Iterator, Ast::Expression(), qi::blank_type> _expression;
    qi::rule<Iterator, Ast::List(),       qi::blank_type> _list;
    // lexemes
    qi::rule<Iterator, Ast::Term()>       _term;
    qi::rule<Iterator, Ast::Name()>       _rule_name;
    qi::rule<Iterator, std::string()>     _literal;
    qi::rule<Iterator, char()>            _character;
};

现在它将解析您的样本(更正为 BNF):

    std::string const input = R"(<code>   ::=  <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i"
<digit>  ::= "0" | "1" | "2" | "3" | "4"
    )";

Live On Compiler Explorer

打印:

code ::= {<letter>, <digit>} | {<letter>, <digit>, <code>}
letter ::= {a} | {b} | {c} | {d} | {e} | {f} | {g} | {h} | {i}
digit ::= {0} | {1} | {2} | {3} | {4}
Remaining: "
    "

支持换行规则

最好的方法是不接受它们——因为语法不是为它设计的,不像EBNF.

您可以通过在 skipper 中做一个否定的前瞻来强制这个问题:

_skipper = blank | (eol >> !_rule);
start = skip(_skipper) [ _rule % +eol ];

由于技术原因 (Boost spirit skipper issues) 无法编译,因此我们需要在前瞻中为其提供一个占位符跳过器:

_blank = blank;
_skipper = blank | (eol >> !skip(_blank.alias()) [ _rule ]);
start = skip(_skipper.alias()) [ _rule % +eol ];

现在它解析相同但有各种换行符:

    std::string const input = R"(<code>   ::=  <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e"
           | "f" | "g" | "h" | "i"
<digit>  ::= "0" | "1" | "2" | "3" |
             "4"
    )";

打印:

code ::= {<letter>, <digit>} | {<letter>, <digit>, <code>}
letter ::= {a} | {b} | {c} | {d} | {e} | {f} | {g} | {h} | {i}   
digit ::= {0} | {1} | {2} | {3} | {4}

完整列表

Compiler Explorer

//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <fmt/ranges.h>
#include <fmt/ostream.h>
#include <iomanip>
namespace qi = boost::spirit::qi;

namespace Ast {
    struct Name : std::string {
        using std::string::string;
        using std::string::operator=;

        friend std::ostream& operator<<(std::ostream& os, Name const& n) {
            return os << '<' << n.c_str() << '>';
        }
    };

    using Term = boost::variant<Name, std::string>;
    using List = std::list<Term>;
    using Expression = std::list<List>;

    struct Rule {
        Name name; // lhs
        Expression rhs;
    };

    using Syntax = std::list<Rule>;
}

BOOST_FUSION_ADAPT_STRUCT(Ast::Rule, name, rhs)

namespace Parser {
    template <typename Iterator>
    struct BNF: qi::grammar<Iterator, Ast::Syntax()> {
        BNF(): BNF::base_type(start) {
            using namespace qi;
            _blank = blank;
            _skipper = blank | (eol >> !skip(_blank.alias()) [ _rule ]);
            start = skip(_skipper.alias()) [ _rule % +eol ];

            _rule       = _rule_name >> "::=" >> _expression;
            _expression = _list % '|';
            _list       = +_term;
            _term       = _literal | _rule_name;
            _literal    = '"' >> *(_character - '"') >> '"'
                        | "'" >> *(_character - "'") >> "'";
            _character  = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\^_`{}~[-");
            _rule_name  = '<' >> (alpha >> *(alnum | char_('-'))) >> '>';

            BOOST_SPIRIT_DEBUG_NODES(
                (_rule)(_expression)(_list)(_term)
                (_literal)(_character)
                (_rule_name))
        }

      private:
        using Skipper = qi::rule<Iterator>;
        Skipper _skipper, _blank;

        qi::rule<Iterator, Ast::Syntax()>     start;
        qi::rule<Iterator, Ast::Rule(),       Skipper> _rule;
        qi::rule<Iterator, Ast::Expression(), Skipper> _expression;
        qi::rule<Iterator, Ast::List(),       Skipper> _list;
        // lexemes
        qi::rule<Iterator, Ast::Term()>       _term;
        qi::rule<Iterator, Ast::Name()>       _rule_name;
        qi::rule<Iterator, std::string()>     _literal;
        qi::rule<Iterator, char()>            _character;
    };
}

int main() {
    Parser::BNF<std::string::const_iterator> const parser;

    std::string const input = R"(<code>   ::=  <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e"
           | "f" | "g" | "h" | "i"
<digit>  ::= "0" | "1" | "2" | "3" |
             "4"
    )";

    auto it = input.begin(), itEnd = input.end();

    Ast::Syntax syntax;
    if (parse(it, itEnd, parser, syntax)) {
        for (auto& rule : syntax)
            fmt::print("{} ::= {}\n", rule.name, fmt::join(rule.rhs, " | "));
    } else {
        std::cout << "Failed\n";
    }

    if (it != itEnd)
        std::cout << "Remaining: " << std::quoted(std::string(it, itEnd)) << "\n";
}

还有Live On Coliru(没有libfmt)