Spirit 在仅出现从 Lexer 获得第一个符号后无法解析

Spirit Fails to Parse After only Appearing to get First symbol From the Lexer

最近在这里问了一个问题:

在这个 post 中有人指出我正在使用的语法是绝对左递归的,这种精神是 PEG 解析器生成器,这意味着左递归是不可能的。

我把文法转换成非左递归文法,使用了龙书左递归部分的规则。

给定一个左递归文法

A -> A >> alpha | beta

可以通过以下方式转换为等价的右递归文法:

A -> beta >> A'

A' -> alpha >> A' | epsilon 

这是生成的解析器,我认为是非左递归产生式:

namespace interpreter {
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{           
    template <typename TokenDef>
    InterpreterGrammar(TokenDef const& tok)
        : InterpreterGrammar::base_type(start)
    {
        using boost::phoenix::ref;

        start %= functionList >> endList >> qi::eoi;

        // different expressions
        exp %= 
               qi::token(k_alphaTerminal) >> qi::token(k_equalTo) >> qi::token(k_alphaTerminal) >> expPrime
               |
               qi::token(k_numericTerminal) >> expPrime
               |
               qi::token(k_trueTok) >> expPrime
               |
               qi::token(k_falseTok) >> expPrime;

        expPrime %=     
               qi::token(k_equalTo) >> exp >> expPrime
               |
               qi::token(k_notEq) >> exp >> expPrime
               |
               qi::token(k_less) >> exp >> expPrime
               |
               qi::token(k_lessEq) >> exp >> expPrime
               |
               qi::token(k_greater) >> exp >> expPrime
               |
               qi::token(k_greaterEq) >> exp >> expPrime
               |
               qi::token(k_andTok) >> exp >> expPrime
               |
               qi::token(k_orTok) >> exp >> expPrime
               |
               qi::token(k_notTok) >> exp 
               |
               qi::token(k_plues) >> exp >> expPrime
               |
               qi::token(k_minus) >> exp >> expPrime
               |
               qi::token(k_mult) >> exp >> expPrime
               |
               qi::token(k_minus) >> exp
               |
               qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket) 
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
               | 
               qi::eps;

        // parameter list
        paramList %= exp >> paramListPrime;

        paramListPrime %= qi::token(k_comma) >> exp >> paramListPrime
                          |
                          qi::eps;

        // return statements
        returnStatement %= qi::token(k_returnTok) >> exp
                           |
                           qi::token(k_returnTok);

        // function call statements
        callStatement %= qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                         |
                         qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen);

        // variable assignment
        assignmentStatement %= qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
                               |
                               qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp
                                   >> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp;

        // list of integers
        intList %= qi::token(k_numericTerminal) >> intListPrime;

        intListPrime %= 
                  qi::token(k_comma) >> qi::token(k_numericTerminal) >> intListPrime
                  |
                  qi::eps;

        // print out a variable
        printStatement %= qi::token(k_print) >> exp;

        // take input
        inputStatement %= qi::token(k_alphaTerminal) >> qi::token(k_input);

        // conditional statement
        conditionStatement %= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse;

        // consitions have optional else
        optionalElse %= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
                        |
                        qi::eps;

        // while loop
        whileStatement %= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw);

        // actual program statements
        endList %= end >> endListPrime;

        endListPrime %= end >> endListPrime
                        |
                        qi::eps;

        // end possibilities of program in global space
        end %= callStatement
               |
               printStatement
               |
               qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_input)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
               |
               qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
                   >> qi::token(k_rightBracket)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
                   >> qi::token(k_assign) >> exp;

        // function parameters
        param %=
                qi::token(k_alphaTerminal) >> paramPrime
                |
                qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket)
                    >> paramPrime;

        // for handling left recursion in paramlist
        paramPrime %= 
                    qi::token(k_comma) >> qi::token(k_alphaTerminal) >> paramPrime
                    | 
                    qi::eps;


        // define a statement as assignment print input condition while or call
        statement %= 
                    assignmentStatement
                    |
                    printStatement
                    |
                    inputStatement
                    |
                    conditionStatement
                    |
                    whileStatement
                    |
                    callStatement
                    |
                    returnStatement;

        // general statement list
        statements %= statement >> statementsPrime;

        // for handling left recursion in statements
        statementsPrime %= statement >> statementsPrime
                           |
                           qi::eps;

        // functions
        functionList %= qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
                            >> param >> qi::token(k_rightParen) >> qi::token(k_colon)
                            >> statements >> qi::token(k_fed)
                        |
                        qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
                            >> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed)
                        | qi::eps;

        BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
        debug(start);
    }
    qi::rule<Iterator, Skipper> start;
    qi::rule<Iterator, Skipper> functionList;
    qi::rule<Iterator, Skipper> endList;
    qi::rule<Iterator, Skipper> endListPrime;
    qi::rule<Iterator, Skipper> param;
    qi::rule<Iterator, Skipper> paramPrime;
    qi::rule<Iterator, Skipper> paramList;
    qi::rule<Iterator, Skipper> paramListPrime;
    qi::rule<Iterator, Skipper> statements;
    qi::rule<Iterator, Skipper> statementsPrime;
    qi::rule<Iterator, Skipper> statement;
    qi::rule<Iterator, Skipper> assignmentStatement;
    qi::rule<Iterator, Skipper> printStatement;
    qi::rule<Iterator, Skipper> inputStatement;
    qi::rule<Iterator, Skipper> conditionStatement;
    qi::rule<Iterator, Skipper> whileStatement;
    qi::rule<Iterator, Skipper> callStatement;
    qi::rule<Iterator, Skipper> returnStatement;
    qi::rule<Iterator, Skipper> exp;
    qi::rule<Iterator, Skipper> expPrime;
    qi::rule<Iterator, Skipper> intList;
    qi::rule<Iterator, Skipper> intListPrime;
    qi::rule<Iterator, Skipper> optionalElse;
    qi::rule<Iterator, Skipper> end;
};

}

这是词法分析器

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <fstream>
#include <streambuf>

#include <boost/bind.hpp>
#include <boost/ref.hpp>

namespace interpreter
{
    namespace lex = boost::spirit::lex;

    enum Tokens
    {
        k_andTok = 1,
        k_def = 2,
        k_elihw = 3,
        k_elseTok = 4,
        k_falseTok = 5,
        k_fed = 6,
        k_fi = 7,
        k_ifTok = 8,
        k_input = 9,
        k_notTok = 10,
        k_orTok = 11,
        k_print = 12,
        k_returnTok = 13,
        k_trueTok = 14,
        k_whileTok = 15,
        k_plues = 16,
        k_minus = 17,
        k_mult = 18,
        k_div = 19,
        k_bang = 20,
        k_equalTo = 21,
        k_greaterEq = 22,
        k_lessEq = 23,
        k_notEq = 24,
        k_less = 25,
        k_greater = 26,
        k_assign = 27,
        k_comma = 28,
        k_colon = 29,
        k_leftParen = 30,
        k_rightParen = 31,
        k_leftBracket = 32,
        k_rightBracket = 33,
        k_alphaTerminal = 34,
        k_numericTerminal = 35
    };

    template <typename Lexer>
    struct LexerTokens : lex::lexer<Lexer>
    {
        LexerTokens() :
           whiteSpace("[ \t\n]"),
           andTok("and"),
           def("def"),
           elihw("elihw"),
           elseTok("else"),
           falseTok("false"),
           fed("fed"),
           fi("fi"),
           ifTok("if"),
           input("input"),
           notTok("not"),
           orTok("or"),
           print("print"),
           returnTok("return"),
           trueTok("true"),
           whileTok("while"),
           plus("\+"),
           minus("\-"),
           mult("\*"),
           div("\/"),
           bang("\!"),
           equalTo("=="),
           greaterEq(">="),
           lessEq("<="),
           notEq("!="),
           less("<"),
           greater(">"),
           assign("="),
           comma(","),
           colon(":"),
           leftParen("\("),
           rightParen("\)"),
           leftBracket("\["),
           rightBracket("\["),
           alphaTerminal("[a-z][a-zA-Z0-9]*"),
           numericTerminal("[0-9]*")
        {
            this->self("WHITESPACE") = whiteSpace;

            this->self.add
                (andTok, k_andTok)
                (def, k_def)
                (elihw, k_elihw)
                (elseTok, k_elseTok)
                (falseTok, k_falseTok)
                (fed, k_fed)
                (fi, k_fi)
                (ifTok, k_ifTok)
                (andTok, k_andTok)
                (input, k_input)
                (notTok, k_notTok)
                (orTok, k_orTok)
                (print, k_print)
                (returnTok, k_returnTok)
                (trueTok, k_trueTok)
                (whileTok, k_whileTok)
                (plus, k_plues)
                (minus, k_minus)
                (mult, k_mult)
                (div, k_div)
                (bang, k_bang)
                (equalTo, k_equalTo)
                (greaterEq, k_greaterEq)
                (lessEq, k_lessEq)
                (notEq, k_notEq)
                (less, k_less)
                (greater, k_greater)
                (assign, k_assign)
                (comma, k_comma)
                (colon, k_colon)
                (leftParen, k_leftParen)
                (rightParen, k_rightParen)
                (leftBracket, k_leftBracket)
                (rightBracket, k_rightBracket)
                (alphaTerminal, k_alphaTerminal)
                (numericTerminal, k_numericTerminal);
        }

        lex::token_def<lex::omit> whiteSpace;
        lex::token_def<std::string> andTok;
        lex::token_def<std::string> def;
        lex::token_def<std::string> elihw;
        lex::token_def<std::string> elseTok;
        lex::token_def<std::string> falseTok;
        lex::token_def<std::string> fed;
        lex::token_def<std::string> fi;
        lex::token_def<std::string> ifTok;
        lex::token_def<std::string> input;
        lex::token_def<std::string> notTok;
        lex::token_def<std::string> orTok;
        lex::token_def<std::string> print;
        lex::token_def<std::string> returnTok;
        lex::token_def<std::string> trueTok;
        lex::token_def<std::string> whileTok;
        lex::token_def<std::string> plus;
        lex::token_def<std::string> minus;
        lex::token_def<std::string> mult;
        lex::token_def<std::string> div;
        lex::token_def<std::string> bang;
        lex::token_def<std::string> equalTo;
        lex::token_def<std::string> greaterEq;
        lex::token_def<std::string> lessEq;
        lex::token_def<std::string> notEq;
        lex::token_def<std::string> less;
        lex::token_def<std::string> greater;
        lex::token_def<std::string> assign;
        lex::token_def<std::string> comma;
        lex::token_def<std::string> colon;
        lex::token_def<std::string> leftParen;
        lex::token_def<std::string> rightParen;
        lex::token_def<std::string> leftBracket;
        lex::token_def<std::string> rightBracket;
        lex::token_def<std::string> alphaTerminal;
        lex::token_def<std::string> numericTerminal;
    };

这是我的示例测试程序

    int main(int argc, char** argv)
    {
        namespace lex = boost::spirit::lex;
        namespace qi = boost::spirit::qi;

        typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
        typedef lex::lexertl::lexer<token_type> lexer_type;
        typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
        typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;

        interpreter::LexerTokens< lexer_type > lexer;
        interpreter::InterpreterGrammar< iterator_type, skipper_type > parser(lexer);
        std::string sourceCode("def print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)"); 

        char const* first = sourceCode.c_str();
        char const* last = &first[sourceCode.size()];
        bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);

        std::cout << "Remaining " << std::string(first,last) << std::endl;
        std::cout << "R is " << r << std::endl;
    }

这些修改引起了几个问题,首先,通过非正式地查看语法,而没有构造完整的 LL(1) 解析 table,我不相信这个语法是 LL(1 ).我仍然需要验证这一点,但我想知道精神,能够解析这个语法吗?我知道 PEG 通常使用 / 运算符进行前瞻,精神目前是否这样做?我在另一个看post那个精神可不可以?

其次,这个语法失败了,我注意到在开始生产时,我简化了开始生产并使其成为:

start %= functionList;

然后将输入更改为:

def print_it(x, y): print 3*x + y return fed

语法调试语句指出解析成功。但是,我看到剩余的字符串是:

print_it(x, y): print 3*x + y return fed

所以实际上只解析了第一个标记。经过一些调试后,我不确定为什么解析成功以及为什么只消耗了一个符号,这可能是词法分析器的问题吗?

此外,当我将开始生产更改为:

时,我看到了类似的结果
start %= endList;

并使用输入

y = x

然而,这无法解析并且只使用字符 y。

最后,我的调试语句的输出不是很有帮助,当 运行 调试语句产生的输出是:

<start>
  <try>[][][][][][][][][][][][][][][][][][][][]</try>
  <fail/>
</start>
Remaining  print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)
R is 0

我认为这意味着在语法中尝试了二十个产生式,从二十个空[],这是一个正确的假设吗?还有为什么 [] 是空的?我通常会看到它们带有一些对调试有用的文本。是不是因为匹配到正则表达式语法就自动成功了?如果是这种情况,有没有办法让调试语句在使用令牌枚举令牌而不是使用宏添加表达式时打印有用的输出?

如有任何帮助或指出正确的方向,我们将不胜感激,谢谢。

Which I assume to mean twenty productions are attempted in the grammar, from the twenty empty [], is that a correct assumption?

没有。 [] 表示输入标记。

Also why are the [] empty?

可能没有有用的方法来打印它们,所以它们显示为空。

If that is the case, is there a way to get the debug statement to print helpful output when using the token enum token as opposed to adding expressions using macros?

我也这么认为。但我从不使用 Lex。所以,可能需要一段时间才能弄清楚。

首先引起我注意的是:

typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;

您的 AttributeTypes 显示 omit。的确,改为

typedef lex::lexertl::token< char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_ > token_type;

确实有生命迹象。对于输入 x=y(无空格!),它打印:

Live On Coliru

<start>
  <try>[y][=][x][][][][][][][][][][][][][][][][][]</try>
  <fail/>
</start>
Remaining 

R is false

现在,对于 def print_it(x, y): print 3*x + y return fed 输入,输出仍然是:

<start>
  <try>[def][][][][][][][][][][][][][][][][][][][]</try>
  <fail/>
</start>
Remaining  print_it(x, y): print 3*x + y return fed

R is false

信息量略多。有趣的是,它似乎在第一个空格上也失败了。 whiteSpace 正则表达式看起来不错,所以我 searched for lex in_state in my answers 记住我曾经学到的关于跳过和 Lex 的知识。

我尝试了一些建议。在第二个 post 之后,我看到了这条评论:

Nice! You can add to your answer that having a separate state inside the lexer for skipping white spaces has another considerable drawback: lack of debugging support. If you use a separate state for skipping and then use BOOST_SPIRIT_DEBUG_NODE, you won't get the full output of the token stream. This is because the default debug handler advances the lexer iterator to get tokens, the lexer is of course in the INITIAL state. As soon as it meets a white space, the iteration stops, because the lexer cannot find a match. The token stream will be cut at the first white space in the rule's trace. –

为了好玩,让我们尝试不使用单独的状态,而是使用来自 Troubles with boost::spirit::lex & whitespace 的状态。

现在,表达式无法解析,因为

  • print_it 不是有效的标识符。让我们将 _ 添加到 alphaTerminal
  • alphaTerminal 需要 = 跟随。稍微修正一下表达式:

    exp = (
                qi::token(k_alphaTerminal)
              | qi::token(k_numericTerminal)
              | qi::token(k_trueTok)
              | qi::token(k_falseTok))
        >> expPrime
        ;
    
  • 事实上,这些令牌 ID 是无效的。您需要从 min_token_id(即 0x10000)开始:

    enum Tokens
    {
        k_andTok = lex::tokenids::min_token_id + 1,
        k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input,
        k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok,
        k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq,
        k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon,
        k_leftParen, k_rightParen, k_leftBracket, k_rightBracket,
        k_alphaTerminal, k_numericTerminal,
    };
    
  • 你为什么还要指定令牌 ID?根据您正在将对标记定义的引用传递给解析器的事实进行猜测,我认为您将要使用它们?

    exp
        = (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime
        ;
    
  • 此外,让我们为所有规则启用调试:

    BOOST_SPIRIT_DEBUG_NODES(
        (start) (functionList) (endList) (endListPrime) (param)
        (paramPrime) (paramList) (paramListPrime) (statements)
        (statementsPrime) (statement) (assignmentStatement)
        (printStatement) (inputStatement) (conditionStatement)
        (whileStatement) (callStatement) (returnStatement) (exp)
        (expPrime) (intList) (intListPrime) (optionalElse) (end)
    )
    

而且,在花了太多时间试图理解为什么状态切换船长似乎不起作用之后,我放弃了。这是我修补的结果:

Live On Coliru

//#define USE_STATES
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/lex.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix.hpp>
namespace lex = boost::spirit::lex;

namespace interpreter {
    enum Tokens
    {
        k_andTok = lex::tokenids::min_token_id + 1,
        k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input,
        k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok,
        k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq,
        k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon,
        k_leftParen, k_rightParen, k_leftBracket, k_rightBracket,
        k_alphaTerminal, k_numericTerminal,
    };

    template <typename Lexer>
    struct LexerTokens : lex::lexer<Lexer>
    {
        LexerTokens() :
           whiteSpace("[ \t\n]"),
           andTok("and"),
           def("def"),
           elihw("elihw"),
           elseTok("else"),
           falseTok("false"),
           fed("fed"),
           fi("fi"),
           ifTok("if"),
           input("input"),
           notTok("not"),
           orTok("or"),
           print("print"),
           returnTok("return"),
           trueTok("true"),
           whileTok("while"),
           plus("\+"),
           minus("\-"),
           mult("\*"),
           div("\/"),
           bang("\!"),
           equalTo("=="),
           greaterEq(">="),
           lessEq("<="),
           notEq("!="),
           less("<"),
           greater(">"),
           assign("="),
           comma(","),
           colon(":"),
           leftParen("\("),
           rightParen("\)"),
           leftBracket("\["),
           rightBracket("\["),
           alphaTerminal("[a-z][a-zA-Z0-9_]*"),
           numericTerminal("[0-9]*")
        {
            this->self.add
                (andTok, k_andTok)
                (def, k_def)
                (elihw, k_elihw)
                (elseTok, k_elseTok)
                (falseTok, k_falseTok)
                (fed, k_fed)
                (fi, k_fi)
                (ifTok, k_ifTok)
                (andTok, k_andTok)
                (input, k_input)
                (notTok, k_notTok)
                (orTok, k_orTok)
                (print, k_print)
                (returnTok, k_returnTok)
                (trueTok, k_trueTok)
                (whileTok, k_whileTok)
                (plus, k_plues)
                (minus, k_minus)
                (mult, k_mult)
                (div, k_div)
                (bang, k_bang)
                (equalTo, k_equalTo)
                (greaterEq, k_greaterEq)
                (lessEq, k_lessEq)
                (notEq, k_notEq)
                (less, k_less)
                (greater, k_greater)
                (assign, k_assign)
                (comma, k_comma)
                (colon, k_colon)
                (leftParen, k_leftParen)
                (rightParen, k_rightParen)
                (leftBracket, k_leftBracket)
                (rightBracket, k_rightBracket)
                (alphaTerminal, k_alphaTerminal)
                (numericTerminal, k_numericTerminal);

#ifdef USE_STATES
            this->self("WHITESPACE") = whiteSpace;
#else
            this->self += whiteSpace [ lex::_pass = lex::pass_flags::pass_ignore ];
#endif
        }

        lex::token_def<lex::omit> whiteSpace;
        lex::token_def<std::string> andTok;
        lex::token_def<std::string> def;
        lex::token_def<std::string> elihw;
        lex::token_def<std::string> elseTok;
        lex::token_def<std::string> falseTok;
        lex::token_def<std::string> fed;
        lex::token_def<std::string> fi;
        lex::token_def<std::string> ifTok;
        lex::token_def<std::string> input;
        lex::token_def<std::string> notTok;
        lex::token_def<std::string> orTok;
        lex::token_def<std::string> print;
        lex::token_def<std::string> returnTok;
        lex::token_def<std::string> trueTok;
        lex::token_def<std::string> whileTok;
        lex::token_def<std::string> plus;
        lex::token_def<std::string> minus;
        lex::token_def<std::string> mult;
        lex::token_def<std::string> div;
        lex::token_def<std::string> bang;
        lex::token_def<std::string> equalTo;
        lex::token_def<std::string> greaterEq;
        lex::token_def<std::string> lessEq;
        lex::token_def<std::string> notEq;
        lex::token_def<std::string> less;
        lex::token_def<std::string> greater;
        lex::token_def<std::string> assign;
        lex::token_def<std::string> comma;
        lex::token_def<std::string> colon;
        lex::token_def<std::string> leftParen;
        lex::token_def<std::string> rightParen;
        lex::token_def<std::string> leftBracket;
        lex::token_def<std::string> rightBracket;
        lex::token_def<std::string> alphaTerminal;
        lex::token_def<std::string> numericTerminal;
    };

    namespace qi = boost::spirit::qi;
    template <typename Iterator, typename Skipper>
    struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
    {
        template <typename TokenDef>
        InterpreterGrammar(TokenDef const& tok)
            : InterpreterGrammar::base_type(start)
        {
            start
                = functionList >> endList >> qi::eoi
                ;

            // different expressions
            exp
                = (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime
                ;

            expPrime
                = tok.equalTo   >> exp >> expPrime
                | tok.notEq     >> exp >> expPrime
                | tok.less      >> exp >> expPrime
                | tok.lessEq    >> exp >> expPrime
                | tok.greater   >> exp >> expPrime
                | tok.greaterEq >> exp >> expPrime
                | tok.andTok    >> exp >> expPrime
                | tok.orTok     >> exp >> expPrime
                | tok.notTok    >> exp
                | tok.plus      >> exp >> expPrime
                | tok.minus     >> exp >> expPrime
                | tok.mult      >> exp >> expPrime
                | tok.minus >> exp
                | tok.leftParen >> exp >> tok.rightParen
                | tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
                | tok.alphaTerminal >> tok.leftParen >> tok.rightParen
                | tok.alphaTerminal >> tok.leftParen >> exp >> tok.rightParen
                | qi::eps
                ;

            // parameter list
            paramList
                = exp >> paramListPrime
                ;

            paramListPrime
                = tok.comma >> exp >> paramListPrime
                | qi::eps
                ;

            // return statements
            returnStatement
                = tok.returnTok >> exp
                | tok.returnTok
                ;

            // function call statements
            callStatement
                = tok.alphaTerminal >> tok.leftParen >> tok.rightParen
                | tok.alphaTerminal >> tok.leftParen >> paramList >> tok.rightParen
                ;

            // variable assignment
            assignmentStatement
                = tok.alphaTerminal >> tok.assign >> exp
                | tok.alphaTerminal >> tok.leftBracket >> exp
                    >> tok.rightBracket >> tok.assign >> exp
                ;

            // list of integers
            intList
                = tok.numericTerminal >> intListPrime
                ;

            intListPrime
                = tok.comma >> tok.numericTerminal >> intListPrime
                | qi::eps
                ;

            // print out a variable
            printStatement
                = tok.print >> exp
                ;

            // take input
            inputStatement
                = tok.alphaTerminal >> tok.input
                ;

            // conditional statement
            conditionStatement
                = tok.ifTok >> exp >> tok.colon >> statements >> optionalElse
                ;

            // consitions have optional else
            optionalElse
                = tok.elseTok >> tok.colon >> statements
                | qi::eps
                ;

            // while loop
            whileStatement
                = tok.whileTok >> exp >> tok.colon >> statements >> tok.elihw
                ;

            // actual program statements
            endList
                = end >> endListPrime
                ;

            endListPrime
                = end >> endListPrime
                | qi::eps
                ;

            // end possibilities of program in global space
            end
                = callStatement
                | printStatement
                | tok.alphaTerminal >> tok.assign >> tok.input
                | tok.alphaTerminal >> tok.assign >> exp
                | tok.alphaTerminal >> tok.assign >> tok.leftBracket >> intList
                    >> tok.rightBracket
                | tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
                    >> tok.assign >> exp
                ;

            // function parameters
            param
                = tok.alphaTerminal >> paramPrime
                | tok.alphaTerminal >> tok.leftBracket >> tok.rightBracket
                    >> paramPrime
                ;

            // for handling left recursion in paramlist
            paramPrime
                = tok.comma >> tok.alphaTerminal >> paramPrime
                | qi::eps
                ;

            // define a statement as assignment print input condition while or call
            statement
                = assignmentStatement
                | printStatement
                | inputStatement
                | conditionStatement
                | whileStatement
                | callStatement
                | returnStatement
                ;

            // general statement list
            statements
                = statement >> statementsPrime
                ;

            // for handling left recursion in statements
            statementsPrime
                = statement >> statementsPrime
                | qi::eps
                ;

            // functions
            functionList
                = tok.def >> tok.alphaTerminal >> tok.leftParen
                    >> param >> tok.rightParen >> tok.colon
                    >> statements >> tok.fed
                | tok.def >> tok.alphaTerminal >> tok.leftParen
                    >> tok.rightParen >> tok.colon >> statements >> tok.fed
                | qi::eps
                ;

            BOOST_SPIRIT_DEBUG_NODES(
                (start) (functionList) (endList) (endListPrime) (param)
                (paramPrime) (paramList) (paramListPrime) (statements)
                (statementsPrime) (statement) (assignmentStatement)
                (printStatement) (inputStatement) (conditionStatement)
                (whileStatement) (callStatement) (returnStatement) (exp)
                (expPrime) (intList) (intListPrime) (optionalElse) (end)
            )

        }
      private:
        qi::rule<Iterator, Skipper> start;
        qi::rule<Iterator, Skipper> functionList;
        qi::rule<Iterator, Skipper> endList;
        qi::rule<Iterator, Skipper> endListPrime;
        qi::rule<Iterator, Skipper> param;
        qi::rule<Iterator, Skipper> paramPrime;
        qi::rule<Iterator, Skipper> paramList;
        qi::rule<Iterator, Skipper> paramListPrime;
        qi::rule<Iterator, Skipper> statements;
        qi::rule<Iterator, Skipper> statementsPrime;
        qi::rule<Iterator, Skipper> statement;
        qi::rule<Iterator, Skipper> assignmentStatement;
        qi::rule<Iterator, Skipper> printStatement;
        qi::rule<Iterator, Skipper> inputStatement;
        qi::rule<Iterator, Skipper> conditionStatement;
        qi::rule<Iterator, Skipper> whileStatement;
        qi::rule<Iterator, Skipper> callStatement;
        qi::rule<Iterator, Skipper> returnStatement;
        qi::rule<Iterator, Skipper> exp;
        qi::rule<Iterator, Skipper> expPrime;
        qi::rule<Iterator, Skipper> intList;
        qi::rule<Iterator, Skipper> intListPrime;
        qi::rule<Iterator, Skipper> optionalElse;
        qi::rule<Iterator, Skipper> end;
    };
}

#include <fstream>
#include <iterator>

int main(int argc, char** argv) {
    namespace lex = boost::spirit::lex;
    namespace qi = boost::spirit::qi;

    typedef lex::lexertl::token<char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_> token_type;
#ifdef USE_STATES
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
    typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
#else
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
    typedef qi::unused_type skipper_type;
#endif

    interpreter::LexerTokens<lexer_type> lexer;
    interpreter::InterpreterGrammar<iterator_type, skipper_type> parser(lexer);

    // read the file
    if (argc != 2)
    {
        std::cout << "File required" << std::endl;
        return 1;
    }

    std::ifstream t(argv[1]);
    std::string const sourceCode { std::istreambuf_iterator<char>(t), {} };

    char const* first = sourceCode.data();
    char const* last = first + sourceCode.size();
#ifdef USE_STATES
    bool ok = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
#else
    bool ok = lex::tokenize_and_parse(first, last, lexer, parser);
#endif

    std::cout << "Remaining '" << std::string(first,last) << "'" << std::endl;
    std::cout << "R is " << std::boolalpha << ok << std::endl;
}

版画

<start>
  <try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
  <functionList>
    <try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
    <param>
      <try>[x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
      <paramPrime>
        <try>[,][y][)][:][print][3][*][x][+][y][return][fed]</try>
        <paramPrime>
          <try>[)][:][print][3][*][x][+][y][return][fed]</try>
          <success>[)][:][print][3][*][x][+][y][return][fed]</success>
          <attributes>[]</attributes>
        </paramPrime>
        <success>[)][:][print][3][*][x][+][y][return][fed]</success>
        <attributes>[]</attributes>
      </paramPrime>
      <success>[)][:][print][3][*][x][+][y][return][fed]</success>
      <attributes>[]</attributes>
    </param>
    <statements>
      <try>[print][3][*][x][+][y][return][fed]</try>
      <statement>
        <try>[print][3][*][x][+][y][return][fed]</try>
        <assignmentStatement>
          <try>[print][3][*][x][+][y][return][fed]</try>
          <fail/>
        </assignmentStatement>
        <printStatement>
          <try>[print][3][*][x][+][y][return][fed]</try>
          <exp>
            <try>[3][*][x][+][y][return][fed]</try>
            <expPrime>
              <try>[*][x][+][y][return][fed]</try>
              <exp>
                <try>[x][+][y][return][fed]</try>
                <expPrime>
                  <try>[+][y][return][fed]</try>
                  <exp>
                    <try>[y][return][fed]</try>
                    <expPrime>
                      <try>[return][fed]</try>
                      <success>[return][fed]</success>
                      <attributes>[]</attributes>
                    </expPrime>
                    <success>[return][fed]</success>
                    <attributes>[]</attributes>
                  </exp>
                  <expPrime>
                    <try>[return][fed]</try>
                    <success>[return][fed]</success>
                    <attributes>[]</attributes>
                  </expPrime>
                  <success>[return][fed]</success>
                  <attributes>[]</attributes>
                </expPrime>
                <success>[return][fed]</success>
                <attributes>[]</attributes>
              </exp>
              <expPrime>
                <try>[return][fed]</try>
                <success>[return][fed]</success>
                <attributes>[]</attributes>
              </expPrime>
              <success>[return][fed]</success>
              <attributes>[]</attributes>
            </expPrime>
            <success>[return][fed]</success>
            <attributes>[]</attributes>
          </exp>
          <success>[return][fed]</success>
          <attributes>[]</attributes>
        </printStatement>
        <success>[return][fed]</success>
        <attributes>[]</attributes>
      </statement>
      <statementsPrime>
        <try>[return][fed]</try>
        <statement>
          <try>[return][fed]</try>
          <assignmentStatement>
            <try>[return][fed]</try>
            <fail/>
          </assignmentStatement>
          <printStatement>
            <try>[return][fed]</try>
            <fail/>
          </printStatement>
          <inputStatement>
            <try>[return][fed]</try>
            <fail/>
          </inputStatement>
          <conditionStatement>
            <try>[return][fed]</try>
            <fail/>
          </conditionStatement>
          <whileStatement>
            <try>[return][fed]</try>
            <fail/>
          </whileStatement>
          <callStatement>
            <try>[return][fed]</try>
            <fail/>
          </callStatement>
          <returnStatement>
            <try>[return][fed]</try>
            <exp>
              <try>[fed]</try>
              <fail/>
            </exp>
            <success>[fed]</success>
            <attributes>[]</attributes>
          </returnStatement>
          <success>[fed]</success>
          <attributes>[]</attributes>
        </statement>
        <statementsPrime>
          <try>[fed]</try>
          <statement>
            <try>[fed]</try>
            <assignmentStatement>
              <try>[fed]</try>
              <fail/>
            </assignmentStatement>
            <printStatement>
              <try>[fed]</try>
              <fail/>
            </printStatement>
            <inputStatement>
              <try>[fed]</try>
              <fail/>
            </inputStatement>
            <conditionStatement>
              <try>[fed]</try>
              <fail/>
            </conditionStatement>
            <whileStatement>
              <try>[fed]</try>
              <fail/>
            </whileStatement>
            <callStatement>
              <try>[fed]</try>
              <fail/>
            </callStatement>
            <returnStatement>
              <try>[fed]</try>
              <fail/>
            </returnStatement>
            <fail/>
          </statement>
          <success>[fed]</success>
          <attributes>[]</attributes>
        </statementsPrime>
        <success>[fed]</success>
        <attributes>[]</attributes>
      </statementsPrime>
      <success>[fed]</success>
      <attributes>[]</attributes>
    </statements>
    <success></success>
    <attributes>[]</attributes>
  </functionList>
  <endList>
    <try></try>
    <end>
      <try></try>
      <callStatement>
        <try></try>
        <fail/>
      </callStatement>
      <printStatement>
        <try></try>
        <fail/>
      </printStatement>
      <fail/>
    </end>
    <fail/>
  </endList>
  <fail/>
</start>
Remaining ''
R is false