在解析器中提升 Spirit 段错误

Boost Spirit Segfault In Parser

我一直在尝试将我在本科编译器课程中编写的一些 lex 和 yacc 代码转换为精神代码以学习精神,但我发现了一个我似乎无法弄清楚的段错误。我这样写词法分析器:

namespace lex = boost::spirit::lex;

enum Tokens
{
    k_andTok = 1,
    k_def = 2,
    k_elihw = 3,
    k_elseTok = 4,
    k_falseTok = 5,
    k_fed = 6,
    k_fi = 7,
    k_ifTok = 8,
    k_input = 9,
    k_notTok = 10,
    k_orTok = 11,
    k_print = 12,
    k_returnTok = 13,
    k_trueTok = 14,
    k_whileTok = 15,
    k_plues = 16,
    k_minus = 17,
    k_mult = 18,
    k_div = 19,
    k_bang = 20,
    k_equalTo = 21,
    k_greaterEq = 22,
    k_lessEq = 23,
    k_notEq = 24,
    k_less = 25,
    k_greater = 26,
    k_assign = 27,
    k_comma = 28,
    k_colon = 29,
    k_leftParen = 30,
    k_rightParen = 31,
    k_leftBracket = 32,
    k_rightBracket = 33,
    k_nonTerminal = 34,
    k_terminal = 35
};

template <typename Lexer>
struct LexerTokens : lex::lexer<Lexer>
{
    LexerTokens() :
       whiteSpace("[ \t\n]"),
       andTok("and"),
       def("def"),
       elihw("elihw"),
       elseTok("else"),
       falseTok("false"),
       fed("fed"),
       fi("fi"),
       ifTok("if"),
       input("input"),
       notTok("not"),
       orTok("or"),
       print("print"),
       returnTok("return"),
       trueTok("true"),
       whileTok("while"),
       plus("\+"),
       minus("\-"),
       mult("\*"),
       div("\/"),
       bang("\!"),
       equalTo("=="),
       greaterEq(">="),
       lessEq("<="),
       notEq("!="),
       less("<"),
       greater(">"),
       assign("="),
       comma(","),
       colon(":"),
       leftParen("\("),
       rightParen("\)"),
       leftBracket("\["),
       rightBracket("\["),
       nonTerminal("[a-z][a-zA-Z0-9]*"),
       terminal("[0-9]")
    {
        this->self("WHITESPACE") = whiteSpace;

        this->self.add
            (andTok, k_andTok)
            (def, k_def)
            (elihw, k_elihw)
            (elseTok, k_elseTok)
            (falseTok, k_falseTok)
            (fed, k_fed)
            (fi, k_fi)
            (ifTok, k_ifTok)
            (andTok, k_andTok)
            (input, k_input)
            (notTok, k_notTok)
            (orTok, k_orTok)
            (print, k_print)
            (returnTok, k_returnTok)
            (trueTok, k_trueTok)
            (whileTok, k_whileTok)
            (plus, k_plues)
            (minus, k_minus)
            (mult, k_mult)
            (div, k_div)
            (bang, k_bang)
            (equalTo, k_equalTo)
            (greaterEq, k_greaterEq)
            (lessEq, k_lessEq)
            (notEq, k_notEq)
            (less, k_less)
            (greater, k_greater)
            (assign, k_assign)
            (comma, k_comma)
            (colon, k_colon)
            (leftParen, k_leftParen)
            (rightParen, k_rightParen)
            (leftBracket, k_leftBracket)
            (rightBracket, k_rightBracket)
            (nonTerminal, k_nonTerminal)
            (terminal, k_terminal);
    }

    lex::token_def<lex::omit> whiteSpace;
    lex::token_def<std::string> andTok;
    lex::token_def<std::string> def;
    lex::token_def<std::string> elihw;
    lex::token_def<std::string> elseTok;
    lex::token_def<std::string> falseTok;
    lex::token_def<std::string> fed;
    lex::token_def<std::string> fi;
    lex::token_def<std::string> ifTok;
    lex::token_def<std::string> input;
    lex::token_def<std::string> notTok;
    lex::token_def<std::string> orTok;
    lex::token_def<std::string> print;
    lex::token_def<std::string> returnTok;
    lex::token_def<std::string> trueTok;
    lex::token_def<std::string> whileTok;
    lex::token_def<std::string> plus;
    lex::token_def<std::string> minus;
    lex::token_def<std::string> mult;
    lex::token_def<std::string> div;
    lex::token_def<std::string> bang;
    lex::token_def<std::string> equalTo;
    lex::token_def<std::string> greaterEq;
    lex::token_def<std::string> lessEq;
    lex::token_def<std::string> notEq;
    lex::token_def<std::string> less;
    lex::token_def<std::string> greater;
    lex::token_def<std::string> assign;
    lex::token_def<std::string> comma;
    lex::token_def<std::string> colon;
    lex::token_def<std::string> leftParen;
    lex::token_def<std::string> rightParen;
    lex::token_def<std::string> leftBracket;
    lex::token_def<std::string> rightBracket;
    lex::token_def<std::string> nonTerminal;
    lex::token_def<std::string> terminal;
};

和解析器

namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{        
//    using boost::phoenix::ref;
//    using boost::phoenix::size;

    template <typename TokenDef>
    InterpreterGrammar(TokenDef const& tok)
        : InterpreterGrammar::base_type(start),
        connect(0)
    {
        start %= functionList >> endList >> qi::eoi;

        // different expressions
        exp %= exp >> qi::token(k_equalTo) >> exp
              |
              exp >> qi::token(k_notEq) >> exp
              |
              exp >> qi::token(k_less) >> exp
              |
              exp >> qi::token(k_lessEq) >> exp
              |
              exp >> qi::token(k_greater) >> exp
              |
              exp >> qi::token(k_greaterEq) >> exp
              |
              exp >> qi::token(k_andTok) >> exp
              |
              exp >> qi::token(k_orTok) >> exp
              |
              qi::token(k_notTok) >> exp 
              |
              exp >> qi::token(k_plues) >> exp
              |
              exp >> qi::token(k_minus) >> exp
              |
              exp >> qi::token(k_mult) >> exp
              |
              qi::token(k_minus) >> exp
              |
              qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
              |
              qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket) 
              |
              qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
              |
              qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
              |
              qi::token(k_nonTerminal)
              |
              qi::token(k_terminal)
              |
              qi::token(k_trueTok)
              |
              qi::token(k_falseTok);

        // parameter list
        paramList %= paramList >> qi::token(k_comma) >> exp
                    |
                    exp;

        // return statements
        returnStatement %= returnStatement >> exp
                         |
                         returnStatement;

        // function call statements
        callStatement %= qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                        |
                        qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen);

        // variable assignment
        assignmentStatement %= qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
                              |
                              qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp
                                  >> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp;

        // list of integers
        intList %= intList >> qi::token(k_comma) >> qi::token(k_terminal)
                  |
                  qi::token(k_terminal);

        // print out a variable
        printStatement %= qi::token(k_print) >> exp;

        // take input
        inputStatement %= qi::token(k_nonTerminal) >> qi::token(k_input);

        // conditional statement
        conditionStatement %= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse;

        // consitions have optional else
        optionalElse %= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
                       |
                       qi::eps;

        // while loop
        whileStatement %= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw);

        // actual program statements
        endList %= endList >> end
                  |
                  end;

        // end possibilities of program in global space
        end %= callStatement
              |
              printStatement
              |
              qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_input)
              |
              qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
              |
              qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
                  >> qi::token(k_rightBracket)
              |
              qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
                  >> qi::token(k_assign) >> exp;

        // function parameters
        paramList %= paramList >> qi::token(k_comma) >> qi::token(k_nonTerminal)
                    |
                    qi::token(k_nonTerminal)
                    |
                    qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket);

        // define a statement as assignment print input condition while or call
        statement %= assignmentStatement
                    |
                    printStatement
                    |
                    inputStatement
                    |
                    conditionStatement
                    |
                    whileStatement
                    |
                    callStatement
                    |
                    returnStatement;

        // general statement list
        statements %= statements >> statement
                     |
                     statement;

        // functions
        functionList %= qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                           >> paramList >> qi::token(k_rightParen) >> qi::token(k_colon)
                           >> statements >> qi::token(k_fed)
                       |
                       qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                           >> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed);
                       | qi::eps;

        BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
        debug(start);
    }

    qi::rule<Iterator, Skipper> start;
    qi::rule<Iterator, Skipper> functionList;
    qi::rule<Iterator, Skipper> endList;
    qi::rule<Iterator, Skipper> paramList;
    qi::rule<Iterator, Skipper> statements;
    qi::rule<Iterator, Skipper> statement;
    qi::rule<Iterator, Skipper> assignmentStatement;
    qi::rule<Iterator, Skipper> printStatement;
    qi::rule<Iterator, Skipper> inputStatement;
    qi::rule<Iterator, Skipper> conditionStatement;
    qi::rule<Iterator, Skipper> whileStatement;
    qi::rule<Iterator, Skipper> callStatement;
    qi::rule<Iterator, Skipper> returnStatement;
    qi::rule<Iterator, Skipper> exp;
    qi::rule<Iterator, Skipper> intList;
    qi::rule<Iterator, Skipper> optionalElse;
    qi::rule<Iterator, Skipper> end;
};

和主要部分

int main(int argc, char** argv)
{
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;

typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;

LexerTokens< lexer_type > lexer;
InterpreterGrammar< iterator_type, skipper_type > parser(lexer);

// read the file
if (argc != 2)
{
    std::cout << "File required" << std::endl;
    return 1;
}

std::ifstream t(argv[1]); 

t.seekg(0, std::ios::end);   
sourceCode.reserve(t.tellg());
t.seekg(0, std::ios::beg);

sourceCode.assign(std::istreambuf_iterator<char>(t), 
                  std::istreambuf_iterator<char>());

char const* first = sourceCode.c_str();
char const* last = &first[sourceCode.size()];
bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);

std::cout << "Remaining " << std::string(first,last) << std::endl;
std::cout << "R is " << r << std::endl;
}

该语言的示例是:

def add(x,y) :                                                                                                                              
  if (x <= 0) : return y fi
   return 1 + add(x-1,y) 
fed
y = add(5,4)
print y

我运行遇到的错误是调用语法时的解析器段错误。

我看到如果我

语法不会出现段错误并正确解析表达式。

当我 运行 调试器中的代码时,我在 代码段错误,一个大的表达式被打印出来,所有的成员都有一个 字符串说,

error reading variable: Cannot access memory at address 0x7fffff7fefe0

我检查了其他类似的帖子,其中的错误是精神上的段错误, 然而,

任何正确方向的观点,或对当前代码的批评都是 非常感谢。

如果你使用 AddressSanitizer,它会告诉你:

<start>...
  <try>[]</try>...
ASAN:DEADLYSIGNAL...
=================================================================...
==8985==ERROR: AddressSanitizer: stack-overflow on address 0x7ffeb280dfc8 (pc 0x0000004c9cf6 bp 0x7f...
    #0 0x4c9cf5 in __asan_memcpy (/home/sehe/Projects/Whosebug/sotest+0x4c9cf5)...
    #1 0x68eb77 in bool boost::spirit::any_if<boost::spirit::traits::attribute_not_unused<boost::spi...
    #2 0x68e844 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion::...
    #3 0x68e487 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion::...
    #4 0x68e190 in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
    #5 0x68de4a in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
    #6 0x68d8b5 in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
    #7 0x6e085c in bool boost::fusion::detail::linear_any<boost::fusion::cons_iterator<boost::fusion...
    #8 0x6e053f in bool boost::fusion::detail::any<boost::fusion::cons<boost::spirit::qi::sequence<b...
    #9 0x6e0218 in bool boost::fusion::any<boost::fusion::cons<boost::spirit::qi::sequence<boost::fu...
    #10 0x6dffc5 in bool boost::spirit::qi::alternative<boost::fusion::cons<boost::spirit::qi::seque...
    #11 0x6dfbf7 in bool boost::spirit::qi::detail::parser_binder<boost::spirit::qi::alternative<boo...
    #12 0x6de330 in boost::detail::function::function_obj_invoker4<boost::spirit::qi::detail::parser...
    #13 0x5d633a in boost::function4<bool, boost::spirit::lex::lexertl::iterator<boost::spirit::lex:...
    #14 0x5d58e8 in bool boost::spirit::qi::rule<boost::spirit::lex::lexertl::iterator<boost::spirit...
    #15 0x5d54e9 in bool boost::spirit::qi::reference<boost::spirit::qi::rule<boost::spirit::lex::le...
    #16 0x5d49bf in bool boost::spirit::qi::detail::fail_function<boost::spirit::lex::lexertl::itera...
    #17 0x68f56c in bool boost::fusion::detail::linear_any<boost::fusion::cons_iterator<boost::fusio...
    #18 0x68f267 in bool boost::fusion::detail::any<boost::fusion::cons<boost::spirit::qi::reference...
    #19 0x68ef6e in bool boost::fusion::any<boost::fusion::cons<boost::spirit::qi::reference<boost::...
    #20 0x68ebae in bool boost::spirit::any_if<boost::spirit::traits::attribute_not_unused<boost::sp...
    #21 0x68e844 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion:...
    [ snip repeated frames ]
    #250 0x68e487 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion...


SUMMARY: AddressSanitizer: stack-overflow (/home/sehe/Projects/Whosebug/sotest+0x4c9cf5) in __a...
==8985==ABORTING...

所以,这显然是左递归导致堆栈溢出。

其他解析器生成器处理它的事实意义不大:Spirit 是一个 PEG 解析器生成器,左递归是不可能的。

你需要重写

    exp %= exp >> qi::token(k_equalTo) >> exp

进入使 lhs 更具体的东西。

注意:我不得不修复一些与您呈现代码的方式有关的随机问题。这是我用来重现的:

Live On Coliru

#include <boost/spirit/include/lex.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
namespace lex = boost::spirit::lex;

namespace interpreter {
    enum Tokens
    {
        k_andTok = 1,
        k_def = 2,
        k_elihw = 3,
        k_elseTok = 4,
        k_falseTok = 5,
        k_fed = 6,
        k_fi = 7,
        k_ifTok = 8,
        k_input = 9,
        k_notTok = 10,
        k_orTok = 11,
        k_print = 12,
        k_returnTok = 13,
        k_trueTok = 14,
        k_whileTok = 15,
        k_plues = 16,
        k_minus = 17,
        k_mult = 18,
        k_div = 19,
        k_bang = 20,
        k_equalTo = 21,
        k_greaterEq = 22,
        k_lessEq = 23,
        k_notEq = 24,
        k_less = 25,
        k_greater = 26,
        k_assign = 27,
        k_comma = 28,
        k_colon = 29,
        k_leftParen = 30,
        k_rightParen = 31,
        k_leftBracket = 32,
        k_rightBracket = 33,
        k_nonTerminal = 34,
        k_terminal = 35
    };

    template <typename Lexer>
    struct LexerTokens : lex::lexer<Lexer>
    {
        LexerTokens() :
           whiteSpace("[ \t\n]"),
           andTok("and"),
           def("def"),
           elihw("elihw"),
           elseTok("else"),
           falseTok("false"),
           fed("fed"),
           fi("fi"),
           ifTok("if"),
           input("input"),
           notTok("not"),
           orTok("or"),
           print("print"),
           returnTok("return"),
           trueTok("true"),
           whileTok("while"),
           plus("\+"),
           minus("\-"),
           mult("\*"),
           div("\/"),
           bang("\!"),
           equalTo("=="),
           greaterEq(">="),
           lessEq("<="),
           notEq("!="),
           less("<"),
           greater(">"),
           assign("="),
           comma(","),
           colon(":"),
           leftParen("\("),
           rightParen("\)"),
           leftBracket("\["),
           rightBracket("\["),
           nonTerminal("[a-z][a-zA-Z0-9]*"),
           terminal("[0-9]")
        {
            this->self("WHITESPACE") = whiteSpace;

            this->self.add
                (andTok, k_andTok)
                (def, k_def)
                (elihw, k_elihw)
                (elseTok, k_elseTok)
                (falseTok, k_falseTok)
                (fed, k_fed)
                (fi, k_fi)
                (ifTok, k_ifTok)
                (andTok, k_andTok)
                (input, k_input)
                (notTok, k_notTok)
                (orTok, k_orTok)
                (print, k_print)
                (returnTok, k_returnTok)
                (trueTok, k_trueTok)
                (whileTok, k_whileTok)
                (plus, k_plues)
                (minus, k_minus)
                (mult, k_mult)
                (div, k_div)
                (bang, k_bang)
                (equalTo, k_equalTo)
                (greaterEq, k_greaterEq)
                (lessEq, k_lessEq)
                (notEq, k_notEq)
                (less, k_less)
                (greater, k_greater)
                (assign, k_assign)
                (comma, k_comma)
                (colon, k_colon)
                (leftParen, k_leftParen)
                (rightParen, k_rightParen)
                (leftBracket, k_leftBracket)
                (rightBracket, k_rightBracket)
                (nonTerminal, k_nonTerminal)
                (terminal, k_terminal);
        }

        lex::token_def<lex::omit> whiteSpace;
        lex::token_def<std::string> andTok;
        lex::token_def<std::string> def;
        lex::token_def<std::string> elihw;
        lex::token_def<std::string> elseTok;
        lex::token_def<std::string> falseTok;
        lex::token_def<std::string> fed;
        lex::token_def<std::string> fi;
        lex::token_def<std::string> ifTok;
        lex::token_def<std::string> input;
        lex::token_def<std::string> notTok;
        lex::token_def<std::string> orTok;
        lex::token_def<std::string> print;
        lex::token_def<std::string> returnTok;
        lex::token_def<std::string> trueTok;
        lex::token_def<std::string> whileTok;
        lex::token_def<std::string> plus;
        lex::token_def<std::string> minus;
        lex::token_def<std::string> mult;
        lex::token_def<std::string> div;
        lex::token_def<std::string> bang;
        lex::token_def<std::string> equalTo;
        lex::token_def<std::string> greaterEq;
        lex::token_def<std::string> lessEq;
        lex::token_def<std::string> notEq;
        lex::token_def<std::string> less;
        lex::token_def<std::string> greater;
        lex::token_def<std::string> assign;
        lex::token_def<std::string> comma;
        lex::token_def<std::string> colon;
        lex::token_def<std::string> leftParen;
        lex::token_def<std::string> rightParen;
        lex::token_def<std::string> leftBracket;
        lex::token_def<std::string> rightBracket;
        lex::token_def<std::string> nonTerminal;
        lex::token_def<std::string> terminal;
    };

    namespace qi = boost::spirit::qi;
    template <typename Iterator, typename Skipper>
    struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
    {        
    //    using boost::phoenix::ref;
    //    using boost::phoenix::size;

        template <typename TokenDef>
        InterpreterGrammar(TokenDef const& )
            : InterpreterGrammar::base_type(start)
              //, connect(0)
        {
            start 
                = functionList >> endList >> qi::eoi
                ;

            // different expressions
            exp = exp >> qi::token(k_equalTo) >> exp
                | exp >> qi::token(k_notEq) >> exp
                | exp >> qi::token(k_less) >> exp
                | exp >> qi::token(k_lessEq) >> exp
                | exp >> qi::token(k_greater) >> exp
                | exp >> qi::token(k_greaterEq) >> exp
                | exp >> qi::token(k_andTok) >> exp
                | exp >> qi::token(k_orTok) >> exp
                | qi::token(k_notTok) >> exp 
                | exp >> qi::token(k_plues) >> exp
                | exp >> qi::token(k_minus) >> exp
                | exp >> qi::token(k_mult) >> exp
                | qi::token(k_minus) >> exp
                | qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket) 
                | qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal)
                | qi::token(k_terminal)
                | qi::token(k_trueTok)
                | qi::token(k_falseTok)
                ;

            // parameter list
            paramList 
                = paramList >> qi::token(k_comma) >> exp
                | exp
                ;

            // return statements
            returnStatement 
                = returnStatement >> exp
                | returnStatement
                ;

            // function call statements
            callStatement 
                = qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                | qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen)
                ;

            // variable assignment
            assignmentStatement 
                = qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp
                    >> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp
                ;

            // list of integers
            intList 
                = intList >> qi::token(k_comma) >> qi::token(k_terminal)
                | qi::token(k_terminal)
                ;

            // print out a variable
            printStatement 
                = qi::token(k_print) >> exp
                ;

            // take input
            inputStatement 
                = qi::token(k_nonTerminal) >> qi::token(k_input)
                ;

            // conditional statement
            conditionStatement 
                = qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse
                ;

            // consitions have optional else
            optionalElse 
                = qi::token(k_elseTok) >> qi::token(k_colon) >> statements
                | qi::eps
                ;

            // while loop
            whileStatement 
                = qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw)
                ;

            // actual program statements
            endList 
                = endList >> end
                | end
                ;

            // end possibilities of program in global space
            end = callStatement
                | printStatement
                | qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_input)
                | qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
                | qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
                    >> qi::token(k_rightBracket)
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
                    >> qi::token(k_assign) >> exp
                ;

            // function parameters
            paramList 
                = paramList >> qi::token(k_comma) >> qi::token(k_nonTerminal)
                | qi::token(k_nonTerminal)
                | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket)
                ;

            // define a statement as assignment print input condition while or call
            statement 
                = assignmentStatement
                | printStatement
                | inputStatement
                | conditionStatement
                | whileStatement
                | callStatement
                | returnStatement
                ;

            // general statement list
            statements 
                = statements >> statement
                | statement
                ;

            // functions
            functionList 
                = qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                               >> paramList >> qi::token(k_rightParen) >> qi::token(k_colon)
                               >> statements >> qi::token(k_fed)
                | qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                               >> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed)
                | qi::eps
                ;

            BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
        }

        qi::rule<Iterator, Skipper> start;
        qi::rule<Iterator, Skipper> functionList;
        qi::rule<Iterator, Skipper> endList;
        qi::rule<Iterator, Skipper> paramList;
        qi::rule<Iterator, Skipper> statements;
        qi::rule<Iterator, Skipper> statement;
        qi::rule<Iterator, Skipper> assignmentStatement;
        qi::rule<Iterator, Skipper> printStatement;
        qi::rule<Iterator, Skipper> inputStatement;
        qi::rule<Iterator, Skipper> conditionStatement;
        qi::rule<Iterator, Skipper> whileStatement;
        qi::rule<Iterator, Skipper> callStatement;
        qi::rule<Iterator, Skipper> returnStatement;
        qi::rule<Iterator, Skipper> exp;
        qi::rule<Iterator, Skipper> intList;
        qi::rule<Iterator, Skipper> optionalElse;
        qi::rule<Iterator, Skipper> end;
    };
}

#include <fstream>
#include <iterator>

int main(int argc, char** argv) {
    namespace lex = boost::spirit::lex;
    namespace qi = boost::spirit::qi;

    typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
    typedef lex::lexertl::lexer<token_type> lexer_type;
    typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
    typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;

    interpreter::LexerTokens< lexer_type > lexer;
    interpreter::InterpreterGrammar< iterator_type, skipper_type > parser(lexer);

    // read the file
    if (argc != 2)
    {
        std::cout << "File required" << std::endl;
        return 1;
    }

    std::ifstream t(argv[1]); 
    std::string const sourceCode { std::istreambuf_iterator<char>(t), {} };

    char const* first = sourceCode.data();
    char const* last = first + sourceCode.size();
    bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);

    std::cout << "Remaining " << std::string(first,last) << std::endl;
    std::cout << "R is " << r << std::endl;
}