在解析器中提升 Spirit 段错误
Boost Spirit Segfault In Parser
我一直在尝试将我在本科编译器课程中编写的一些 lex 和 yacc 代码转换为精神代码以学习精神,但我发现了一个我似乎无法弄清楚的段错误。我这样写词法分析器:
namespace lex = boost::spirit::lex;
enum Tokens
{
k_andTok = 1,
k_def = 2,
k_elihw = 3,
k_elseTok = 4,
k_falseTok = 5,
k_fed = 6,
k_fi = 7,
k_ifTok = 8,
k_input = 9,
k_notTok = 10,
k_orTok = 11,
k_print = 12,
k_returnTok = 13,
k_trueTok = 14,
k_whileTok = 15,
k_plues = 16,
k_minus = 17,
k_mult = 18,
k_div = 19,
k_bang = 20,
k_equalTo = 21,
k_greaterEq = 22,
k_lessEq = 23,
k_notEq = 24,
k_less = 25,
k_greater = 26,
k_assign = 27,
k_comma = 28,
k_colon = 29,
k_leftParen = 30,
k_rightParen = 31,
k_leftBracket = 32,
k_rightBracket = 33,
k_nonTerminal = 34,
k_terminal = 35
};
template <typename Lexer>
struct LexerTokens : lex::lexer<Lexer>
{
LexerTokens() :
whiteSpace("[ \t\n]"),
andTok("and"),
def("def"),
elihw("elihw"),
elseTok("else"),
falseTok("false"),
fed("fed"),
fi("fi"),
ifTok("if"),
input("input"),
notTok("not"),
orTok("or"),
print("print"),
returnTok("return"),
trueTok("true"),
whileTok("while"),
plus("\+"),
minus("\-"),
mult("\*"),
div("\/"),
bang("\!"),
equalTo("=="),
greaterEq(">="),
lessEq("<="),
notEq("!="),
less("<"),
greater(">"),
assign("="),
comma(","),
colon(":"),
leftParen("\("),
rightParen("\)"),
leftBracket("\["),
rightBracket("\["),
nonTerminal("[a-z][a-zA-Z0-9]*"),
terminal("[0-9]")
{
this->self("WHITESPACE") = whiteSpace;
this->self.add
(andTok, k_andTok)
(def, k_def)
(elihw, k_elihw)
(elseTok, k_elseTok)
(falseTok, k_falseTok)
(fed, k_fed)
(fi, k_fi)
(ifTok, k_ifTok)
(andTok, k_andTok)
(input, k_input)
(notTok, k_notTok)
(orTok, k_orTok)
(print, k_print)
(returnTok, k_returnTok)
(trueTok, k_trueTok)
(whileTok, k_whileTok)
(plus, k_plues)
(minus, k_minus)
(mult, k_mult)
(div, k_div)
(bang, k_bang)
(equalTo, k_equalTo)
(greaterEq, k_greaterEq)
(lessEq, k_lessEq)
(notEq, k_notEq)
(less, k_less)
(greater, k_greater)
(assign, k_assign)
(comma, k_comma)
(colon, k_colon)
(leftParen, k_leftParen)
(rightParen, k_rightParen)
(leftBracket, k_leftBracket)
(rightBracket, k_rightBracket)
(nonTerminal, k_nonTerminal)
(terminal, k_terminal);
}
lex::token_def<lex::omit> whiteSpace;
lex::token_def<std::string> andTok;
lex::token_def<std::string> def;
lex::token_def<std::string> elihw;
lex::token_def<std::string> elseTok;
lex::token_def<std::string> falseTok;
lex::token_def<std::string> fed;
lex::token_def<std::string> fi;
lex::token_def<std::string> ifTok;
lex::token_def<std::string> input;
lex::token_def<std::string> notTok;
lex::token_def<std::string> orTok;
lex::token_def<std::string> print;
lex::token_def<std::string> returnTok;
lex::token_def<std::string> trueTok;
lex::token_def<std::string> whileTok;
lex::token_def<std::string> plus;
lex::token_def<std::string> minus;
lex::token_def<std::string> mult;
lex::token_def<std::string> div;
lex::token_def<std::string> bang;
lex::token_def<std::string> equalTo;
lex::token_def<std::string> greaterEq;
lex::token_def<std::string> lessEq;
lex::token_def<std::string> notEq;
lex::token_def<std::string> less;
lex::token_def<std::string> greater;
lex::token_def<std::string> assign;
lex::token_def<std::string> comma;
lex::token_def<std::string> colon;
lex::token_def<std::string> leftParen;
lex::token_def<std::string> rightParen;
lex::token_def<std::string> leftBracket;
lex::token_def<std::string> rightBracket;
lex::token_def<std::string> nonTerminal;
lex::token_def<std::string> terminal;
};
和解析器
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{
// using boost::phoenix::ref;
// using boost::phoenix::size;
template <typename TokenDef>
InterpreterGrammar(TokenDef const& tok)
: InterpreterGrammar::base_type(start),
connect(0)
{
start %= functionList >> endList >> qi::eoi;
// different expressions
exp %= exp >> qi::token(k_equalTo) >> exp
|
exp >> qi::token(k_notEq) >> exp
|
exp >> qi::token(k_less) >> exp
|
exp >> qi::token(k_lessEq) >> exp
|
exp >> qi::token(k_greater) >> exp
|
exp >> qi::token(k_greaterEq) >> exp
|
exp >> qi::token(k_andTok) >> exp
|
exp >> qi::token(k_orTok) >> exp
|
qi::token(k_notTok) >> exp
|
exp >> qi::token(k_plues) >> exp
|
exp >> qi::token(k_minus) >> exp
|
exp >> qi::token(k_mult) >> exp
|
qi::token(k_minus) >> exp
|
qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
|
qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
|
qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
|
qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
|
qi::token(k_nonTerminal)
|
qi::token(k_terminal)
|
qi::token(k_trueTok)
|
qi::token(k_falseTok);
// parameter list
paramList %= paramList >> qi::token(k_comma) >> exp
|
exp;
// return statements
returnStatement %= returnStatement >> exp
|
returnStatement;
// function call statements
callStatement %= qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
|
qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen);
// variable assignment
assignmentStatement %= qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
|
qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp
>> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp;
// list of integers
intList %= intList >> qi::token(k_comma) >> qi::token(k_terminal)
|
qi::token(k_terminal);
// print out a variable
printStatement %= qi::token(k_print) >> exp;
// take input
inputStatement %= qi::token(k_nonTerminal) >> qi::token(k_input);
// conditional statement
conditionStatement %= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse;
// consitions have optional else
optionalElse %= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
|
qi::eps;
// while loop
whileStatement %= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw);
// actual program statements
endList %= endList >> end
|
end;
// end possibilities of program in global space
end %= callStatement
|
printStatement
|
qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_input)
|
qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
|
qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
>> qi::token(k_rightBracket)
|
qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
>> qi::token(k_assign) >> exp;
// function parameters
paramList %= paramList >> qi::token(k_comma) >> qi::token(k_nonTerminal)
|
qi::token(k_nonTerminal)
|
qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket);
// define a statement as assignment print input condition while or call
statement %= assignmentStatement
|
printStatement
|
inputStatement
|
conditionStatement
|
whileStatement
|
callStatement
|
returnStatement;
// general statement list
statements %= statements >> statement
|
statement;
// functions
functionList %= qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
>> paramList >> qi::token(k_rightParen) >> qi::token(k_colon)
>> statements >> qi::token(k_fed)
|
qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
>> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed);
| qi::eps;
BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
debug(start);
}
qi::rule<Iterator, Skipper> start;
qi::rule<Iterator, Skipper> functionList;
qi::rule<Iterator, Skipper> endList;
qi::rule<Iterator, Skipper> paramList;
qi::rule<Iterator, Skipper> statements;
qi::rule<Iterator, Skipper> statement;
qi::rule<Iterator, Skipper> assignmentStatement;
qi::rule<Iterator, Skipper> printStatement;
qi::rule<Iterator, Skipper> inputStatement;
qi::rule<Iterator, Skipper> conditionStatement;
qi::rule<Iterator, Skipper> whileStatement;
qi::rule<Iterator, Skipper> callStatement;
qi::rule<Iterator, Skipper> returnStatement;
qi::rule<Iterator, Skipper> exp;
qi::rule<Iterator, Skipper> intList;
qi::rule<Iterator, Skipper> optionalElse;
qi::rule<Iterator, Skipper> end;
};
和主要部分
int main(int argc, char** argv)
{
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
LexerTokens< lexer_type > lexer;
InterpreterGrammar< iterator_type, skipper_type > parser(lexer);
// read the file
if (argc != 2)
{
std::cout << "File required" << std::endl;
return 1;
}
std::ifstream t(argv[1]);
t.seekg(0, std::ios::end);
sourceCode.reserve(t.tellg());
t.seekg(0, std::ios::beg);
sourceCode.assign(std::istreambuf_iterator<char>(t),
std::istreambuf_iterator<char>());
char const* first = sourceCode.c_str();
char const* last = &first[sourceCode.size()];
bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
std::cout << "Remaining " << std::string(first,last) << std::endl;
std::cout << "R is " << r << std::endl;
}
该语言的示例是:
def add(x,y) :
if (x <= 0) : return y fi
return 1 + add(x-1,y)
fed
y = add(5,4)
print y
我运行遇到的错误是调用语法时的解析器段错误。
我看到如果我
- 注释掉部分相关规则(funtionList)直到该部分
语法需要调用另一个规则(如 paramList)
- 并删除发送到 lexer/parser 中的部分源代码
只包括令牌部分,
语法不会出现段错误并正确解析表达式。
当我 运行 调试器中的代码时,我在
代码段错误,一个大的表达式被打印出来,所有的成员都有一个
字符串说,
error reading variable: Cannot access memory at address 0x7fffff7fefe0
我检查了其他类似的帖子,其中的错误是精神上的段错误,
然而,
- 我不认为这个错误是由于语法被递归,或者
规则中有临时语法,因为我写的 LL 解析器
之前在 lex 和 yacc 中成功地解析了语句,我
相信所有规则都将存在于整个 运行 程序中。
任何正确方向的观点,或对当前代码的批评都是
非常感谢。
如果你使用 AddressSanitizer,它会告诉你:
<start>...
<try>[]</try>...
ASAN:DEADLYSIGNAL...
=================================================================...
==8985==ERROR: AddressSanitizer: stack-overflow on address 0x7ffeb280dfc8 (pc 0x0000004c9cf6 bp 0x7f...
#0 0x4c9cf5 in __asan_memcpy (/home/sehe/Projects/Whosebug/sotest+0x4c9cf5)...
#1 0x68eb77 in bool boost::spirit::any_if<boost::spirit::traits::attribute_not_unused<boost::spi...
#2 0x68e844 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion::...
#3 0x68e487 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion::...
#4 0x68e190 in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
#5 0x68de4a in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
#6 0x68d8b5 in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
#7 0x6e085c in bool boost::fusion::detail::linear_any<boost::fusion::cons_iterator<boost::fusion...
#8 0x6e053f in bool boost::fusion::detail::any<boost::fusion::cons<boost::spirit::qi::sequence<b...
#9 0x6e0218 in bool boost::fusion::any<boost::fusion::cons<boost::spirit::qi::sequence<boost::fu...
#10 0x6dffc5 in bool boost::spirit::qi::alternative<boost::fusion::cons<boost::spirit::qi::seque...
#11 0x6dfbf7 in bool boost::spirit::qi::detail::parser_binder<boost::spirit::qi::alternative<boo...
#12 0x6de330 in boost::detail::function::function_obj_invoker4<boost::spirit::qi::detail::parser...
#13 0x5d633a in boost::function4<bool, boost::spirit::lex::lexertl::iterator<boost::spirit::lex:...
#14 0x5d58e8 in bool boost::spirit::qi::rule<boost::spirit::lex::lexertl::iterator<boost::spirit...
#15 0x5d54e9 in bool boost::spirit::qi::reference<boost::spirit::qi::rule<boost::spirit::lex::le...
#16 0x5d49bf in bool boost::spirit::qi::detail::fail_function<boost::spirit::lex::lexertl::itera...
#17 0x68f56c in bool boost::fusion::detail::linear_any<boost::fusion::cons_iterator<boost::fusio...
#18 0x68f267 in bool boost::fusion::detail::any<boost::fusion::cons<boost::spirit::qi::reference...
#19 0x68ef6e in bool boost::fusion::any<boost::fusion::cons<boost::spirit::qi::reference<boost::...
#20 0x68ebae in bool boost::spirit::any_if<boost::spirit::traits::attribute_not_unused<boost::sp...
#21 0x68e844 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion:...
[ snip repeated frames ]
#250 0x68e487 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion...
SUMMARY: AddressSanitizer: stack-overflow (/home/sehe/Projects/Whosebug/sotest+0x4c9cf5) in __a...
==8985==ABORTING...
所以,这显然是左递归导致堆栈溢出。
其他解析器生成器处理它的事实意义不大:Spirit 是一个 PEG 解析器生成器,左递归是不可能的。
你需要重写
exp %= exp >> qi::token(k_equalTo) >> exp
进入使 lhs 更具体的东西。
注意:我不得不修复一些与您呈现代码的方式有关的随机问题。这是我用来重现的:
#include <boost/spirit/include/lex.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
namespace lex = boost::spirit::lex;
namespace interpreter {
enum Tokens
{
k_andTok = 1,
k_def = 2,
k_elihw = 3,
k_elseTok = 4,
k_falseTok = 5,
k_fed = 6,
k_fi = 7,
k_ifTok = 8,
k_input = 9,
k_notTok = 10,
k_orTok = 11,
k_print = 12,
k_returnTok = 13,
k_trueTok = 14,
k_whileTok = 15,
k_plues = 16,
k_minus = 17,
k_mult = 18,
k_div = 19,
k_bang = 20,
k_equalTo = 21,
k_greaterEq = 22,
k_lessEq = 23,
k_notEq = 24,
k_less = 25,
k_greater = 26,
k_assign = 27,
k_comma = 28,
k_colon = 29,
k_leftParen = 30,
k_rightParen = 31,
k_leftBracket = 32,
k_rightBracket = 33,
k_nonTerminal = 34,
k_terminal = 35
};
template <typename Lexer>
struct LexerTokens : lex::lexer<Lexer>
{
LexerTokens() :
whiteSpace("[ \t\n]"),
andTok("and"),
def("def"),
elihw("elihw"),
elseTok("else"),
falseTok("false"),
fed("fed"),
fi("fi"),
ifTok("if"),
input("input"),
notTok("not"),
orTok("or"),
print("print"),
returnTok("return"),
trueTok("true"),
whileTok("while"),
plus("\+"),
minus("\-"),
mult("\*"),
div("\/"),
bang("\!"),
equalTo("=="),
greaterEq(">="),
lessEq("<="),
notEq("!="),
less("<"),
greater(">"),
assign("="),
comma(","),
colon(":"),
leftParen("\("),
rightParen("\)"),
leftBracket("\["),
rightBracket("\["),
nonTerminal("[a-z][a-zA-Z0-9]*"),
terminal("[0-9]")
{
this->self("WHITESPACE") = whiteSpace;
this->self.add
(andTok, k_andTok)
(def, k_def)
(elihw, k_elihw)
(elseTok, k_elseTok)
(falseTok, k_falseTok)
(fed, k_fed)
(fi, k_fi)
(ifTok, k_ifTok)
(andTok, k_andTok)
(input, k_input)
(notTok, k_notTok)
(orTok, k_orTok)
(print, k_print)
(returnTok, k_returnTok)
(trueTok, k_trueTok)
(whileTok, k_whileTok)
(plus, k_plues)
(minus, k_minus)
(mult, k_mult)
(div, k_div)
(bang, k_bang)
(equalTo, k_equalTo)
(greaterEq, k_greaterEq)
(lessEq, k_lessEq)
(notEq, k_notEq)
(less, k_less)
(greater, k_greater)
(assign, k_assign)
(comma, k_comma)
(colon, k_colon)
(leftParen, k_leftParen)
(rightParen, k_rightParen)
(leftBracket, k_leftBracket)
(rightBracket, k_rightBracket)
(nonTerminal, k_nonTerminal)
(terminal, k_terminal);
}
lex::token_def<lex::omit> whiteSpace;
lex::token_def<std::string> andTok;
lex::token_def<std::string> def;
lex::token_def<std::string> elihw;
lex::token_def<std::string> elseTok;
lex::token_def<std::string> falseTok;
lex::token_def<std::string> fed;
lex::token_def<std::string> fi;
lex::token_def<std::string> ifTok;
lex::token_def<std::string> input;
lex::token_def<std::string> notTok;
lex::token_def<std::string> orTok;
lex::token_def<std::string> print;
lex::token_def<std::string> returnTok;
lex::token_def<std::string> trueTok;
lex::token_def<std::string> whileTok;
lex::token_def<std::string> plus;
lex::token_def<std::string> minus;
lex::token_def<std::string> mult;
lex::token_def<std::string> div;
lex::token_def<std::string> bang;
lex::token_def<std::string> equalTo;
lex::token_def<std::string> greaterEq;
lex::token_def<std::string> lessEq;
lex::token_def<std::string> notEq;
lex::token_def<std::string> less;
lex::token_def<std::string> greater;
lex::token_def<std::string> assign;
lex::token_def<std::string> comma;
lex::token_def<std::string> colon;
lex::token_def<std::string> leftParen;
lex::token_def<std::string> rightParen;
lex::token_def<std::string> leftBracket;
lex::token_def<std::string> rightBracket;
lex::token_def<std::string> nonTerminal;
lex::token_def<std::string> terminal;
};
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{
// using boost::phoenix::ref;
// using boost::phoenix::size;
template <typename TokenDef>
InterpreterGrammar(TokenDef const& )
: InterpreterGrammar::base_type(start)
//, connect(0)
{
start
= functionList >> endList >> qi::eoi
;
// different expressions
exp = exp >> qi::token(k_equalTo) >> exp
| exp >> qi::token(k_notEq) >> exp
| exp >> qi::token(k_less) >> exp
| exp >> qi::token(k_lessEq) >> exp
| exp >> qi::token(k_greater) >> exp
| exp >> qi::token(k_greaterEq) >> exp
| exp >> qi::token(k_andTok) >> exp
| exp >> qi::token(k_orTok) >> exp
| qi::token(k_notTok) >> exp
| exp >> qi::token(k_plues) >> exp
| exp >> qi::token(k_minus) >> exp
| exp >> qi::token(k_mult) >> exp
| qi::token(k_minus) >> exp
| qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
| qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
| qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
| qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
| qi::token(k_nonTerminal)
| qi::token(k_terminal)
| qi::token(k_trueTok)
| qi::token(k_falseTok)
;
// parameter list
paramList
= paramList >> qi::token(k_comma) >> exp
| exp
;
// return statements
returnStatement
= returnStatement >> exp
| returnStatement
;
// function call statements
callStatement
= qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
| qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen)
;
// variable assignment
assignmentStatement
= qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
| qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp
>> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp
;
// list of integers
intList
= intList >> qi::token(k_comma) >> qi::token(k_terminal)
| qi::token(k_terminal)
;
// print out a variable
printStatement
= qi::token(k_print) >> exp
;
// take input
inputStatement
= qi::token(k_nonTerminal) >> qi::token(k_input)
;
// conditional statement
conditionStatement
= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse
;
// consitions have optional else
optionalElse
= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
| qi::eps
;
// while loop
whileStatement
= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw)
;
// actual program statements
endList
= endList >> end
| end
;
// end possibilities of program in global space
end = callStatement
| printStatement
| qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_input)
| qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
| qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
>> qi::token(k_rightBracket)
| qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
>> qi::token(k_assign) >> exp
;
// function parameters
paramList
= paramList >> qi::token(k_comma) >> qi::token(k_nonTerminal)
| qi::token(k_nonTerminal)
| qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket)
;
// define a statement as assignment print input condition while or call
statement
= assignmentStatement
| printStatement
| inputStatement
| conditionStatement
| whileStatement
| callStatement
| returnStatement
;
// general statement list
statements
= statements >> statement
| statement
;
// functions
functionList
= qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
>> paramList >> qi::token(k_rightParen) >> qi::token(k_colon)
>> statements >> qi::token(k_fed)
| qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
>> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed)
| qi::eps
;
BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
}
qi::rule<Iterator, Skipper> start;
qi::rule<Iterator, Skipper> functionList;
qi::rule<Iterator, Skipper> endList;
qi::rule<Iterator, Skipper> paramList;
qi::rule<Iterator, Skipper> statements;
qi::rule<Iterator, Skipper> statement;
qi::rule<Iterator, Skipper> assignmentStatement;
qi::rule<Iterator, Skipper> printStatement;
qi::rule<Iterator, Skipper> inputStatement;
qi::rule<Iterator, Skipper> conditionStatement;
qi::rule<Iterator, Skipper> whileStatement;
qi::rule<Iterator, Skipper> callStatement;
qi::rule<Iterator, Skipper> returnStatement;
qi::rule<Iterator, Skipper> exp;
qi::rule<Iterator, Skipper> intList;
qi::rule<Iterator, Skipper> optionalElse;
qi::rule<Iterator, Skipper> end;
};
}
#include <fstream>
#include <iterator>
int main(int argc, char** argv) {
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
interpreter::LexerTokens< lexer_type > lexer;
interpreter::InterpreterGrammar< iterator_type, skipper_type > parser(lexer);
// read the file
if (argc != 2)
{
std::cout << "File required" << std::endl;
return 1;
}
std::ifstream t(argv[1]);
std::string const sourceCode { std::istreambuf_iterator<char>(t), {} };
char const* first = sourceCode.data();
char const* last = first + sourceCode.size();
bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
std::cout << "Remaining " << std::string(first,last) << std::endl;
std::cout << "R is " << r << std::endl;
}
我一直在尝试将我在本科编译器课程中编写的一些 lex 和 yacc 代码转换为精神代码以学习精神,但我发现了一个我似乎无法弄清楚的段错误。我这样写词法分析器:
namespace lex = boost::spirit::lex;
enum Tokens
{
k_andTok = 1,
k_def = 2,
k_elihw = 3,
k_elseTok = 4,
k_falseTok = 5,
k_fed = 6,
k_fi = 7,
k_ifTok = 8,
k_input = 9,
k_notTok = 10,
k_orTok = 11,
k_print = 12,
k_returnTok = 13,
k_trueTok = 14,
k_whileTok = 15,
k_plues = 16,
k_minus = 17,
k_mult = 18,
k_div = 19,
k_bang = 20,
k_equalTo = 21,
k_greaterEq = 22,
k_lessEq = 23,
k_notEq = 24,
k_less = 25,
k_greater = 26,
k_assign = 27,
k_comma = 28,
k_colon = 29,
k_leftParen = 30,
k_rightParen = 31,
k_leftBracket = 32,
k_rightBracket = 33,
k_nonTerminal = 34,
k_terminal = 35
};
template <typename Lexer>
struct LexerTokens : lex::lexer<Lexer>
{
LexerTokens() :
whiteSpace("[ \t\n]"),
andTok("and"),
def("def"),
elihw("elihw"),
elseTok("else"),
falseTok("false"),
fed("fed"),
fi("fi"),
ifTok("if"),
input("input"),
notTok("not"),
orTok("or"),
print("print"),
returnTok("return"),
trueTok("true"),
whileTok("while"),
plus("\+"),
minus("\-"),
mult("\*"),
div("\/"),
bang("\!"),
equalTo("=="),
greaterEq(">="),
lessEq("<="),
notEq("!="),
less("<"),
greater(">"),
assign("="),
comma(","),
colon(":"),
leftParen("\("),
rightParen("\)"),
leftBracket("\["),
rightBracket("\["),
nonTerminal("[a-z][a-zA-Z0-9]*"),
terminal("[0-9]")
{
this->self("WHITESPACE") = whiteSpace;
this->self.add
(andTok, k_andTok)
(def, k_def)
(elihw, k_elihw)
(elseTok, k_elseTok)
(falseTok, k_falseTok)
(fed, k_fed)
(fi, k_fi)
(ifTok, k_ifTok)
(andTok, k_andTok)
(input, k_input)
(notTok, k_notTok)
(orTok, k_orTok)
(print, k_print)
(returnTok, k_returnTok)
(trueTok, k_trueTok)
(whileTok, k_whileTok)
(plus, k_plues)
(minus, k_minus)
(mult, k_mult)
(div, k_div)
(bang, k_bang)
(equalTo, k_equalTo)
(greaterEq, k_greaterEq)
(lessEq, k_lessEq)
(notEq, k_notEq)
(less, k_less)
(greater, k_greater)
(assign, k_assign)
(comma, k_comma)
(colon, k_colon)
(leftParen, k_leftParen)
(rightParen, k_rightParen)
(leftBracket, k_leftBracket)
(rightBracket, k_rightBracket)
(nonTerminal, k_nonTerminal)
(terminal, k_terminal);
}
lex::token_def<lex::omit> whiteSpace;
lex::token_def<std::string> andTok;
lex::token_def<std::string> def;
lex::token_def<std::string> elihw;
lex::token_def<std::string> elseTok;
lex::token_def<std::string> falseTok;
lex::token_def<std::string> fed;
lex::token_def<std::string> fi;
lex::token_def<std::string> ifTok;
lex::token_def<std::string> input;
lex::token_def<std::string> notTok;
lex::token_def<std::string> orTok;
lex::token_def<std::string> print;
lex::token_def<std::string> returnTok;
lex::token_def<std::string> trueTok;
lex::token_def<std::string> whileTok;
lex::token_def<std::string> plus;
lex::token_def<std::string> minus;
lex::token_def<std::string> mult;
lex::token_def<std::string> div;
lex::token_def<std::string> bang;
lex::token_def<std::string> equalTo;
lex::token_def<std::string> greaterEq;
lex::token_def<std::string> lessEq;
lex::token_def<std::string> notEq;
lex::token_def<std::string> less;
lex::token_def<std::string> greater;
lex::token_def<std::string> assign;
lex::token_def<std::string> comma;
lex::token_def<std::string> colon;
lex::token_def<std::string> leftParen;
lex::token_def<std::string> rightParen;
lex::token_def<std::string> leftBracket;
lex::token_def<std::string> rightBracket;
lex::token_def<std::string> nonTerminal;
lex::token_def<std::string> terminal;
};
和解析器
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{
// using boost::phoenix::ref;
// using boost::phoenix::size;
template <typename TokenDef>
InterpreterGrammar(TokenDef const& tok)
: InterpreterGrammar::base_type(start),
connect(0)
{
start %= functionList >> endList >> qi::eoi;
// different expressions
exp %= exp >> qi::token(k_equalTo) >> exp
|
exp >> qi::token(k_notEq) >> exp
|
exp >> qi::token(k_less) >> exp
|
exp >> qi::token(k_lessEq) >> exp
|
exp >> qi::token(k_greater) >> exp
|
exp >> qi::token(k_greaterEq) >> exp
|
exp >> qi::token(k_andTok) >> exp
|
exp >> qi::token(k_orTok) >> exp
|
qi::token(k_notTok) >> exp
|
exp >> qi::token(k_plues) >> exp
|
exp >> qi::token(k_minus) >> exp
|
exp >> qi::token(k_mult) >> exp
|
qi::token(k_minus) >> exp
|
qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
|
qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
|
qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
|
qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
|
qi::token(k_nonTerminal)
|
qi::token(k_terminal)
|
qi::token(k_trueTok)
|
qi::token(k_falseTok);
// parameter list
paramList %= paramList >> qi::token(k_comma) >> exp
|
exp;
// return statements
returnStatement %= returnStatement >> exp
|
returnStatement;
// function call statements
callStatement %= qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
|
qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen);
// variable assignment
assignmentStatement %= qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
|
qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp
>> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp;
// list of integers
intList %= intList >> qi::token(k_comma) >> qi::token(k_terminal)
|
qi::token(k_terminal);
// print out a variable
printStatement %= qi::token(k_print) >> exp;
// take input
inputStatement %= qi::token(k_nonTerminal) >> qi::token(k_input);
// conditional statement
conditionStatement %= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse;
// consitions have optional else
optionalElse %= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
|
qi::eps;
// while loop
whileStatement %= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw);
// actual program statements
endList %= endList >> end
|
end;
// end possibilities of program in global space
end %= callStatement
|
printStatement
|
qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_input)
|
qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
|
qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
>> qi::token(k_rightBracket)
|
qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
>> qi::token(k_assign) >> exp;
// function parameters
paramList %= paramList >> qi::token(k_comma) >> qi::token(k_nonTerminal)
|
qi::token(k_nonTerminal)
|
qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket);
// define a statement as assignment print input condition while or call
statement %= assignmentStatement
|
printStatement
|
inputStatement
|
conditionStatement
|
whileStatement
|
callStatement
|
returnStatement;
// general statement list
statements %= statements >> statement
|
statement;
// functions
functionList %= qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
>> paramList >> qi::token(k_rightParen) >> qi::token(k_colon)
>> statements >> qi::token(k_fed)
|
qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
>> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed);
| qi::eps;
BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
debug(start);
}
qi::rule<Iterator, Skipper> start;
qi::rule<Iterator, Skipper> functionList;
qi::rule<Iterator, Skipper> endList;
qi::rule<Iterator, Skipper> paramList;
qi::rule<Iterator, Skipper> statements;
qi::rule<Iterator, Skipper> statement;
qi::rule<Iterator, Skipper> assignmentStatement;
qi::rule<Iterator, Skipper> printStatement;
qi::rule<Iterator, Skipper> inputStatement;
qi::rule<Iterator, Skipper> conditionStatement;
qi::rule<Iterator, Skipper> whileStatement;
qi::rule<Iterator, Skipper> callStatement;
qi::rule<Iterator, Skipper> returnStatement;
qi::rule<Iterator, Skipper> exp;
qi::rule<Iterator, Skipper> intList;
qi::rule<Iterator, Skipper> optionalElse;
qi::rule<Iterator, Skipper> end;
};
和主要部分
int main(int argc, char** argv)
{
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
LexerTokens< lexer_type > lexer;
InterpreterGrammar< iterator_type, skipper_type > parser(lexer);
// read the file
if (argc != 2)
{
std::cout << "File required" << std::endl;
return 1;
}
std::ifstream t(argv[1]);
t.seekg(0, std::ios::end);
sourceCode.reserve(t.tellg());
t.seekg(0, std::ios::beg);
sourceCode.assign(std::istreambuf_iterator<char>(t),
std::istreambuf_iterator<char>());
char const* first = sourceCode.c_str();
char const* last = &first[sourceCode.size()];
bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
std::cout << "Remaining " << std::string(first,last) << std::endl;
std::cout << "R is " << r << std::endl;
}
该语言的示例是:
def add(x,y) :
if (x <= 0) : return y fi
return 1 + add(x-1,y)
fed
y = add(5,4)
print y
我运行遇到的错误是调用语法时的解析器段错误。
我看到如果我
- 注释掉部分相关规则(funtionList)直到该部分 语法需要调用另一个规则(如 paramList)
- 并删除发送到 lexer/parser 中的部分源代码 只包括令牌部分,
语法不会出现段错误并正确解析表达式。
当我 运行 调试器中的代码时,我在 代码段错误,一个大的表达式被打印出来,所有的成员都有一个 字符串说,
error reading variable: Cannot access memory at address 0x7fffff7fefe0
我检查了其他类似的帖子,其中的错误是精神上的段错误, 然而,
- 我不认为这个错误是由于语法被递归,或者 规则中有临时语法,因为我写的 LL 解析器 之前在 lex 和 yacc 中成功地解析了语句,我 相信所有规则都将存在于整个 运行 程序中。
任何正确方向的观点,或对当前代码的批评都是 非常感谢。
如果你使用 AddressSanitizer,它会告诉你:
<start>...
<try>[]</try>...
ASAN:DEADLYSIGNAL...
=================================================================...
==8985==ERROR: AddressSanitizer: stack-overflow on address 0x7ffeb280dfc8 (pc 0x0000004c9cf6 bp 0x7f...
#0 0x4c9cf5 in __asan_memcpy (/home/sehe/Projects/Whosebug/sotest+0x4c9cf5)...
#1 0x68eb77 in bool boost::spirit::any_if<boost::spirit::traits::attribute_not_unused<boost::spi...
#2 0x68e844 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion::...
#3 0x68e487 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion::...
#4 0x68e190 in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
#5 0x68de4a in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
#6 0x68d8b5 in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
#7 0x6e085c in bool boost::fusion::detail::linear_any<boost::fusion::cons_iterator<boost::fusion...
#8 0x6e053f in bool boost::fusion::detail::any<boost::fusion::cons<boost::spirit::qi::sequence<b...
#9 0x6e0218 in bool boost::fusion::any<boost::fusion::cons<boost::spirit::qi::sequence<boost::fu...
#10 0x6dffc5 in bool boost::spirit::qi::alternative<boost::fusion::cons<boost::spirit::qi::seque...
#11 0x6dfbf7 in bool boost::spirit::qi::detail::parser_binder<boost::spirit::qi::alternative<boo...
#12 0x6de330 in boost::detail::function::function_obj_invoker4<boost::spirit::qi::detail::parser...
#13 0x5d633a in boost::function4<bool, boost::spirit::lex::lexertl::iterator<boost::spirit::lex:...
#14 0x5d58e8 in bool boost::spirit::qi::rule<boost::spirit::lex::lexertl::iterator<boost::spirit...
#15 0x5d54e9 in bool boost::spirit::qi::reference<boost::spirit::qi::rule<boost::spirit::lex::le...
#16 0x5d49bf in bool boost::spirit::qi::detail::fail_function<boost::spirit::lex::lexertl::itera...
#17 0x68f56c in bool boost::fusion::detail::linear_any<boost::fusion::cons_iterator<boost::fusio...
#18 0x68f267 in bool boost::fusion::detail::any<boost::fusion::cons<boost::spirit::qi::reference...
#19 0x68ef6e in bool boost::fusion::any<boost::fusion::cons<boost::spirit::qi::reference<boost::...
#20 0x68ebae in bool boost::spirit::any_if<boost::spirit::traits::attribute_not_unused<boost::sp...
#21 0x68e844 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion:...
[ snip repeated frames ]
#250 0x68e487 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion...
SUMMARY: AddressSanitizer: stack-overflow (/home/sehe/Projects/Whosebug/sotest+0x4c9cf5) in __a...
==8985==ABORTING...
所以,这显然是左递归导致堆栈溢出。
其他解析器生成器处理它的事实意义不大:Spirit 是一个 PEG 解析器生成器,左递归是不可能的。
你需要重写
exp %= exp >> qi::token(k_equalTo) >> exp
进入使 lhs 更具体的东西。
注意:我不得不修复一些与您呈现代码的方式有关的随机问题。这是我用来重现的:
#include <boost/spirit/include/lex.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
namespace lex = boost::spirit::lex;
namespace interpreter {
enum Tokens
{
k_andTok = 1,
k_def = 2,
k_elihw = 3,
k_elseTok = 4,
k_falseTok = 5,
k_fed = 6,
k_fi = 7,
k_ifTok = 8,
k_input = 9,
k_notTok = 10,
k_orTok = 11,
k_print = 12,
k_returnTok = 13,
k_trueTok = 14,
k_whileTok = 15,
k_plues = 16,
k_minus = 17,
k_mult = 18,
k_div = 19,
k_bang = 20,
k_equalTo = 21,
k_greaterEq = 22,
k_lessEq = 23,
k_notEq = 24,
k_less = 25,
k_greater = 26,
k_assign = 27,
k_comma = 28,
k_colon = 29,
k_leftParen = 30,
k_rightParen = 31,
k_leftBracket = 32,
k_rightBracket = 33,
k_nonTerminal = 34,
k_terminal = 35
};
template <typename Lexer>
struct LexerTokens : lex::lexer<Lexer>
{
LexerTokens() :
whiteSpace("[ \t\n]"),
andTok("and"),
def("def"),
elihw("elihw"),
elseTok("else"),
falseTok("false"),
fed("fed"),
fi("fi"),
ifTok("if"),
input("input"),
notTok("not"),
orTok("or"),
print("print"),
returnTok("return"),
trueTok("true"),
whileTok("while"),
plus("\+"),
minus("\-"),
mult("\*"),
div("\/"),
bang("\!"),
equalTo("=="),
greaterEq(">="),
lessEq("<="),
notEq("!="),
less("<"),
greater(">"),
assign("="),
comma(","),
colon(":"),
leftParen("\("),
rightParen("\)"),
leftBracket("\["),
rightBracket("\["),
nonTerminal("[a-z][a-zA-Z0-9]*"),
terminal("[0-9]")
{
this->self("WHITESPACE") = whiteSpace;
this->self.add
(andTok, k_andTok)
(def, k_def)
(elihw, k_elihw)
(elseTok, k_elseTok)
(falseTok, k_falseTok)
(fed, k_fed)
(fi, k_fi)
(ifTok, k_ifTok)
(andTok, k_andTok)
(input, k_input)
(notTok, k_notTok)
(orTok, k_orTok)
(print, k_print)
(returnTok, k_returnTok)
(trueTok, k_trueTok)
(whileTok, k_whileTok)
(plus, k_plues)
(minus, k_minus)
(mult, k_mult)
(div, k_div)
(bang, k_bang)
(equalTo, k_equalTo)
(greaterEq, k_greaterEq)
(lessEq, k_lessEq)
(notEq, k_notEq)
(less, k_less)
(greater, k_greater)
(assign, k_assign)
(comma, k_comma)
(colon, k_colon)
(leftParen, k_leftParen)
(rightParen, k_rightParen)
(leftBracket, k_leftBracket)
(rightBracket, k_rightBracket)
(nonTerminal, k_nonTerminal)
(terminal, k_terminal);
}
lex::token_def<lex::omit> whiteSpace;
lex::token_def<std::string> andTok;
lex::token_def<std::string> def;
lex::token_def<std::string> elihw;
lex::token_def<std::string> elseTok;
lex::token_def<std::string> falseTok;
lex::token_def<std::string> fed;
lex::token_def<std::string> fi;
lex::token_def<std::string> ifTok;
lex::token_def<std::string> input;
lex::token_def<std::string> notTok;
lex::token_def<std::string> orTok;
lex::token_def<std::string> print;
lex::token_def<std::string> returnTok;
lex::token_def<std::string> trueTok;
lex::token_def<std::string> whileTok;
lex::token_def<std::string> plus;
lex::token_def<std::string> minus;
lex::token_def<std::string> mult;
lex::token_def<std::string> div;
lex::token_def<std::string> bang;
lex::token_def<std::string> equalTo;
lex::token_def<std::string> greaterEq;
lex::token_def<std::string> lessEq;
lex::token_def<std::string> notEq;
lex::token_def<std::string> less;
lex::token_def<std::string> greater;
lex::token_def<std::string> assign;
lex::token_def<std::string> comma;
lex::token_def<std::string> colon;
lex::token_def<std::string> leftParen;
lex::token_def<std::string> rightParen;
lex::token_def<std::string> leftBracket;
lex::token_def<std::string> rightBracket;
lex::token_def<std::string> nonTerminal;
lex::token_def<std::string> terminal;
};
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{
// using boost::phoenix::ref;
// using boost::phoenix::size;
template <typename TokenDef>
InterpreterGrammar(TokenDef const& )
: InterpreterGrammar::base_type(start)
//, connect(0)
{
start
= functionList >> endList >> qi::eoi
;
// different expressions
exp = exp >> qi::token(k_equalTo) >> exp
| exp >> qi::token(k_notEq) >> exp
| exp >> qi::token(k_less) >> exp
| exp >> qi::token(k_lessEq) >> exp
| exp >> qi::token(k_greater) >> exp
| exp >> qi::token(k_greaterEq) >> exp
| exp >> qi::token(k_andTok) >> exp
| exp >> qi::token(k_orTok) >> exp
| qi::token(k_notTok) >> exp
| exp >> qi::token(k_plues) >> exp
| exp >> qi::token(k_minus) >> exp
| exp >> qi::token(k_mult) >> exp
| qi::token(k_minus) >> exp
| qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
| qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
| qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
| qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
| qi::token(k_nonTerminal)
| qi::token(k_terminal)
| qi::token(k_trueTok)
| qi::token(k_falseTok)
;
// parameter list
paramList
= paramList >> qi::token(k_comma) >> exp
| exp
;
// return statements
returnStatement
= returnStatement >> exp
| returnStatement
;
// function call statements
callStatement
= qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
| qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen)
;
// variable assignment
assignmentStatement
= qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
| qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp
>> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp
;
// list of integers
intList
= intList >> qi::token(k_comma) >> qi::token(k_terminal)
| qi::token(k_terminal)
;
// print out a variable
printStatement
= qi::token(k_print) >> exp
;
// take input
inputStatement
= qi::token(k_nonTerminal) >> qi::token(k_input)
;
// conditional statement
conditionStatement
= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse
;
// consitions have optional else
optionalElse
= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
| qi::eps
;
// while loop
whileStatement
= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw)
;
// actual program statements
endList
= endList >> end
| end
;
// end possibilities of program in global space
end = callStatement
| printStatement
| qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_input)
| qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
| qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
>> qi::token(k_rightBracket)
| qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
>> qi::token(k_assign) >> exp
;
// function parameters
paramList
= paramList >> qi::token(k_comma) >> qi::token(k_nonTerminal)
| qi::token(k_nonTerminal)
| qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket)
;
// define a statement as assignment print input condition while or call
statement
= assignmentStatement
| printStatement
| inputStatement
| conditionStatement
| whileStatement
| callStatement
| returnStatement
;
// general statement list
statements
= statements >> statement
| statement
;
// functions
functionList
= qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
>> paramList >> qi::token(k_rightParen) >> qi::token(k_colon)
>> statements >> qi::token(k_fed)
| qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
>> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed)
| qi::eps
;
BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
}
qi::rule<Iterator, Skipper> start;
qi::rule<Iterator, Skipper> functionList;
qi::rule<Iterator, Skipper> endList;
qi::rule<Iterator, Skipper> paramList;
qi::rule<Iterator, Skipper> statements;
qi::rule<Iterator, Skipper> statement;
qi::rule<Iterator, Skipper> assignmentStatement;
qi::rule<Iterator, Skipper> printStatement;
qi::rule<Iterator, Skipper> inputStatement;
qi::rule<Iterator, Skipper> conditionStatement;
qi::rule<Iterator, Skipper> whileStatement;
qi::rule<Iterator, Skipper> callStatement;
qi::rule<Iterator, Skipper> returnStatement;
qi::rule<Iterator, Skipper> exp;
qi::rule<Iterator, Skipper> intList;
qi::rule<Iterator, Skipper> optionalElse;
qi::rule<Iterator, Skipper> end;
};
}
#include <fstream>
#include <iterator>
int main(int argc, char** argv) {
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
interpreter::LexerTokens< lexer_type > lexer;
interpreter::InterpreterGrammar< iterator_type, skipper_type > parser(lexer);
// read the file
if (argc != 2)
{
std::cout << "File required" << std::endl;
return 1;
}
std::ifstream t(argv[1]);
std::string const sourceCode { std::istreambuf_iterator<char>(t), {} };
char const* first = sourceCode.data();
char const* last = first + sourceCode.size();
bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
std::cout << "Remaining " << std::string(first,last) << std::endl;
std::cout << "R is " << r << std::endl;
}