为什么我不能在 flex/bison 中声明新令牌?
Why can't I declare new tokens in flex/bison?
我刚刚向我的解析器添加了一组新标记,但每个新标记都被报告为未声明。第一行标记包含在最后一个工作版本中。
%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ
我在 运行 生成我的 makefile 后收到的错误消息表明 none 新标记已正确声明,尽管所有旧标记仍在运行。
cScan.l:44:9: error: ‘STATIC’ undeclared (first use in this function)
static {return STATIC;}
^
cScan.l:44:9: note: each undeclared identifier is reported only once for each function it appears in
cScan.l:45:9: error: ‘BOOL’ undeclared (first use in this function)
bool {return BOOL;}
^
cScan.l:46:9: error: ‘CHAR’ undeclared (first use in this function)
char {return CHAR;}
^
cScan.l:47:10: error: ‘INT’ undeclared (first use in this function)
int { return INT; }
^
cScan.l:48:15: error: expected expression before ‘;’ token
begin { return BEGIN;}
^
cScan.l:49:9: error: ‘END’ undeclared (first use in this function)
end {return END;}
^
cScan.l:50:9: error: ‘IF’ undeclared (first use in this function)
if {return IF;}
^
cScan.l:51:9: error: ‘THEN’ undeclared (first use in this function)
then {return THEN;}
^
cScan.l:52:9: error: ‘ELSE’ undeclared (first use in this function)
else {return ELSE;}
^
cScan.l:53:9: error: ‘WHILE’ undeclared (first use in this function)
while {return WHILE;}
^
cScan.l:54:9: error: ‘DO’ undeclared (first use in this function)
do {return DO;}
^
cScan.l:55:9: error: ‘FOR’ undeclared (first use in this function)
for {return FOR;}
^
cScan.l:56:9: error: ‘TO’ undeclared (first use in this function)
to {return TO;}
^
cScan.l:57:9: error: ‘BY’ undeclared (first use in this function)
by {return BY;}
^
cScan.l:58:9: error: ‘RETURN’ undeclared (first use in this function)
return {return RETURN;}
^
cScan.l:59:9: error: ‘BREAK’ undeclared (first use in this function)
break {return BREAK;}
^
cScan.l:60:9: error: ‘OR’ undeclared (first use in this function)
or {return OR;}
^
cScan.l:61:9: error: ‘AND’ undeclared (first use in this function)
and {return AND;}
^
cScan.l:62:10: error: ‘NOT’ undeclared (first use in this function)
not { return NOT;}
^
cScan.l:64:10: error: ‘DPLUS’ undeclared (first use in this function)
"++" { return DPLUS; }
^
cScan.l:65:10: error: ‘DMINUS’ undeclared (first use in this function)
"--" { return DMINUS; }
^
cScan.l:66:10: error: ‘LASSIGN’ undeclared (first use in this function)
"<-" { return LASSIGN; }
^
cScan.l:67:10: error: ‘PLUSEQ’ undeclared (first use in this function)
"+=" { return PLUSEQ; }
^
cScan.l:68:10: error: ‘MINUSEQ’ undeclared (first use in this function)
"-=" { return MINUSEQ; }
^
cScan.l:69:10: error: ‘TIMEEQ’ undeclared (first use in this function)
"*=" { return TIMEEQ; }
^
cScan.l:70:10: error: ‘DIVEQ’ undeclared (first use in this function)
"/=" { return DIVEQ; }
^
cScan.l:71:10: error: ‘NOTEQ’ undeclared (first use in this function)
"!=" { return NOTEQ; }
这是我 return 每个标记的 flex 文件:
%{
/*
* cScan.l
*/
#include "scanType.h"
#include "cScan.tab.h"
%}
%option yylineno
LETTER [A-Za-z]
ID {LETTER}[_A-Za-z0-9]*
NUMCONST [0-9]+
STRINGCONST \"([^\\"]|\.)*\"
CHARCONST '\?.'
BOOLCONST true|false
%%
{BOOLCONST} {
struct TokenData boolToken;
yylval.token = &boolToken;
yylval.token->tokenclass = 5;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
if(yytext[0] == 't') {
yylval.token->nvalue = 1;
} else {
yylval.token->nvalue = 0;
}
return BOOLCONST;
}
static { return STATIC; }
bool { return BOOL; }
char { return CHAR; }
int { return INT; }
begin { return BEGIN;}
end { return END;}
if { return IF;}
then { return THEN;}
else { return ELSE;}
while { return WHILE;}
do { return DO;}
for { return FOR;}
to { return TO;}
by { return BY;}
return { return RETURN;}
break { return BREAK;}
or { return OR; }
and { return AND; }
not { return NOT;}
"++" { return DPLUS; }
"--" { return DMINUS; }
"<-" { return LASSIGN; }
"+=" { return PLUSEQ; }
"-=" { return MINUSEQ; }
"*=" { return TIMEEQ; }
"/=" { return DIVEQ; }
"!=" { return NOTEQ; }
{ID} {
struct TokenData idToken;
yylval.token = &idToken;
yylval.token->tokenclass = 1;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
yylval.token->svalue = yytext;
return IDENT;
}
{NUMCONST} {
struct TokenData numToken;
yylval.token = &numToken;
yylval.token->tokenclass = 2;
yylval.token->linenum = yylineno;
yylval.token->nvalue = atoi(yytext);
yylval.token->tokenstr = yytext;
return NUMCONST;
}
{STRINGCONST} {
struct TokenData stringToken;
yylval.token = &stringToken;
yylval.token->tokenclass = 3;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
yylval.token->svalue = yytext;
yylval.token->nvalue = yyleng-2;
return STRINGCONST;
}
{CHARCONST} {
struct TokenData charToken;
yylval.token = &charToken;
yylval.token->tokenclass = 4;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
yylval.token->svalue = yytext;
return CHARCONST;
}
"="|"<"|">"|"+"|"-"|"*"|"/"|"%"|"["|"]"|"*"|"-"|"?"|"("|")"|";"|","|":" { return yytext[0]; }
[ \t\r] ;
##.*\n ;
\n { ; /*option to add stuff*/ }
. { printf("ERROR(%d): Invalid or misplaced input character: '%c'. Character Ignored.\n", yylineno, yytext[0]); }
%%
/*
* When the end of an input file is encountered, exit with success (1).
*/
int yywrap() {
return 1;
}
令牌全部列在cScan.tab.h文件中,该文件包含在cScan.l中。这是他们的定义。
/* Token type. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
enum yytokentype
{
NUMCONST = 258,
STRINGCONST = 259,
IDENT = 260,
CHARCONST = 261,
BOOLCONST = 262,
BEGIN = 263,
END = 264,
IF = 265,
THEN = 266,
ELSE = 267,
WHILE = 268,
DO = 269,
FOR = 270,
TO = 271,
BY = 272,
RETURN = 273,
BREAK = 274,
OR = 275,
AND = 276,
NOT = 277,
STATIC = 278,
BOOL = 279,
CHAR = 280,
INT = 281,
DPLUS = 282,
DMINUS = 283,
LASSIGN = 284,
PLUSEQ = 285,
MINUSEQ = 286,
TIMEEQ = 287,
DIVEQ = 288,
NOTEQ = 289
};
#endif
这是我正在 运行 生成的生成文件。我已经删除了每个生成的文件并再次 运行,但这似乎不是问题所在。
cc = gcc
ccopts = #-ly
lex = flex
lexopts =
lexgens = lex.yy.c
yacc = bison
yaccopts = -d
yaccgens = cScan.tab.c cScan.tab.h
prj = cScan
$(prj): $(lexgens) $(yaccgens)
$(cc) $(lexgens) $(yaccgens) $(ccopts) -o $(prj)
clean:
rm $(lexgens) $(yaccgens) $(prj)
$(yaccgens): $(prj).y
$(yacc) $(yaccopts) $(prj).y
$(lexgens): $(prj).l $(yaccgens)
$(lex) $(lexopts) $(prj).l
为了完整起见,这是整个 bison 文件。
%{
#include "scanType.h"
#include "treeType.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
void yyerror(char*);
int yylex(void);
extern FILE *yyin;
%}
%define parse.error verbose
%union {
struct TokenData *token;//for terminals, from yylex
struct TreeNode *tree;//for nonterminals, to build the tree
char op;
}
%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ
%%
program :
declList
;
declList
: declList decl
| decl
;
decl
: varDecl
| funDecl
;
varDecl
: typeSpec varDeclList ';'
;
scopedVarDecl
: STATIC typeSpec varDeclList ';'
| typeSpec varDeclList ';'
;
varDeclList
: varDeclList ',' varDeclInit
| varDeclInit
;
varDeclInit
: varDeclId
| varDeclId ':' simpleExp
;
varDeclId
: IDENT
| IDENT '[' NUMCONST ']'
;
typeSpec
: BOOL
| CHAR
| INT
;
funDecl
: typeSpec IDENT '(' parms ')' compoundStmt
| IDENT '(' parms ')' compoundStmt
;
parms
: parmList
| {/*Epsilon*/}
;
parmList
: parmList ';' parmTypeList
| parmTypeList
;
parmTypeList
: typeSpec parmIdList
;
parmIdList
: parmIdList ',' parmId
| parmId
;
parmId
: IDENT
| IDENT '['']'
;
stmt
: matchStmt
| unmatchStmt
;
matchStmt
: selectStmt_M
| iterStmt_M
| otherStmt
;
unmatchStmt
: selectStmt_U
| iterStmt_U
;
selectStmt_M
: IF simpleExp THEN matchStmt ELSE matchStmt
;
selectStmt_U
: IF simpleExp THEN stmt
| IF simpleExp THEN matchStmt ELSE unmatchStmt
;
iterStmt_U
: WHILE simpleExp DO unmatchStmt
| FOR IDENT LASSIGN iterRange DO unmatchStmt
;
iterStmt_M
: WHILE simpleExp DO matchStmt
| FOR IDENT LASSIGN iterRange DO matchStmt
;
iterRange
: simpleExp TO simpleExp iterRangeStmtPr
;
iterRangeStmtPr
: BY simpleExp
| {/*Addition to stop ambiguity*/}
;
otherStmt
: expStmt
| returnStmt
| breakStmt
| compoundStmt
;
compoundStmt
: BEGIN localDecls stmtList END
;
localDecls
: localDecls scopedVarDecl
| {/*Epsilon*/}
;
stmtList
: stmtList stmt
| {/*Epsilon*/}
;
expStmt
: exp ';'
| ';'
;
returnStmt
: RETURN ';'
| RETURN exp ';'
;
breakStmt
: BREAK ';'
;
exp
: mutExp
| simpleExp
;
mutExp
: mutable assignop exp
| mutable DPLUS
| mutable DMINUS
;
assignop
: LASSIGN | PLUSEQ | MINUSEQ | TIMEEQ | DIVEQ
;
simpleExp
: simpleExp OR andExp
| andExp
;
andExp
: andExp AND unaryRelExp
| unaryRelExp
;
unaryRelExp
: NOT unaryRelExp
| relExp
;
relExp
: sumExp relop sumExp
| sumExp
;
relop
: '<' | '<' '=' | '>' | '>' '=' | '=' | NOTEQ
;
sumExp
: sumExp sumop mulExp
| mulExp
;
sumop
: '+' | '-'
;
mulExp
: mulExp mulop unaryExp
| unaryExp
;
mulop
: '*' | '/' | '%'
;
unaryExp
: unaryop unaryExp
| factor
;
unaryop
: '-' | '*' | '?'
;
factor
: mutable
| immutable
;
mutable
: IDENT
| IDENT '[' exp ']'
;
immutable
: '(' exp ')'
| call
| constant
;
call
: IDENT '(' args ')'
;
args
: argList
| {/*Epsilon*/}
;
argList
: argList ',' exp
| exp
;
constant
: NUMCONST | STRINGCONST | CHARCONST | BOOLCONST
;
%%
int main(int argc, char *argv[])
{
FILE * fp;
if(argc > 1) {
fp = fopen (argv[1], "r");
yyin = fp;
} else {
yyin = stdin;
}
yyparse();
return 0;
}
void yyerror(char* s)
{
printf("yyerror: \"%s\"\n", s);
}
编辑:
ScanType.h
#ifndef TOKNDATA_H
#define TOKNDATA_H __DATE__" "__TIME__
struct TokenData {
int tokenclass; // token class
int linenum; // line where found
char *tokenstr; // what string was actually read
char cvalue; // any character value
int nvalue; // any numeric value or Boolean value
char *svalue; // any string value e.g. an id
} * useToken;
#endif /*TOKNDATA_H*/
编辑 2:
交换标记在 bison 文件中的位置意味着旧标记也未声明。
像这样改变顺序后
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ
%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
我收到以下错误日志。
cScan.l:44:10: error: ‘STATIC’ undeclared (first use in this function)
static { return STATIC; }
^
cScan.l:45:10: error: ‘BOOL’ undeclared (first use in this function)
bool { return BOOL; }
^
cScan.l:46:10: error: ‘CHAR’ undeclared (first use in this function)
char { return CHAR; }
^
cScan.l:47:10: error: ‘INT’ undeclared (first use in this function)
int { return INT; }
^
cScan.l:48:15: error: expected expression before ‘;’ token
begin { return BEGIN;}
^
cScan.l:49:10: error: ‘END’ undeclared (first use in this function)
end { return END;}
^
cScan.l:50:10: error: ‘IF’ undeclared (first use in this function)
if { return IF;}
^
cScan.l:51:10: error: ‘THEN’ undeclared (first use in this function)
then { return THEN;}
^
cScan.l:52:10: error: ‘ELSE’ undeclared (first use in this function)
else { return ELSE;}
^
cScan.l:53:10: error: ‘WHILE’ undeclared (first use in this function)
while { return WHILE;}
^
cScan.l:54:10: error: ‘DO’ undeclared (first use in this function)
do { return DO;}
^
cScan.l:55:10: error: ‘FOR’ undeclared (first use in this function)
for { return FOR;}
^
cScan.l:56:10: error: ‘TO’ undeclared (first use in this function)
to { return TO;}
^
cScan.l:57:10: error: ‘BY’ undeclared (first use in this function)
by { return BY;}
^
cScan.l:58:10: error: ‘RETURN’ undeclared (first use in this function)
return { return RETURN;}
^
cScan.l:59:10: error: ‘BREAK’ undeclared (first use in this function)
break { return BREAK;}
^
cScan.l:60:10: error: ‘OR’ undeclared (first use in this function)
or { return OR; }
^
cScan.l:61:10: error: ‘AND’ undeclared (first use in this function)
and { return AND; }
^
cScan.l:62:10: error: ‘NOT’ undeclared (first use in this function)
not { return NOT;}
^
cScan.l:64:10: error: ‘DPLUS’ undeclared (first use in this function)
"++" { return DPLUS; }
^
cScan.l:65:10: error: ‘DMINUS’ undeclared (first use in this function)
"--" { return DMINUS; }
^
cScan.l:66:10: error: ‘LASSIGN’ undeclared (first use in this function)
"<-" { return LASSIGN; }
^
cScan.l:67:10: error: ‘PLUSEQ’ undeclared (first use in this function)
"+=" { return PLUSEQ; }
^
cScan.l:68:10: error: ‘MINUSEQ’ undeclared (first use in this function)
"-=" { return MINUSEQ; }
^
cScan.l:69:10: error: ‘TIMEEQ’ undeclared (first use in this function)
"*=" { return TIMEEQ; }
^
cScan.l:70:10: error: ‘DIVEQ’ undeclared (first use in this function)
"/=" { return DIVEQ; }
^
cScan.l:71:10: error: ‘NOTEQ’ undeclared (first use in this function)
"!=" { return NOTEQ; }
^
cScan.l:80:12: error: ‘IDENT’ undeclared (first use in this function)
return IDENT;
^
cScan.l:90:12: error: ‘NUMCONST’ undeclared (first use in this function)
return NUMCONST;
^
cScan.l:101:12: error: ‘STRINGCONST’ undeclared (first use in this function)
return STRINGCONST;
^
cScan.l:112:12: error: ‘CHARCONST’ undeclared (first use in this function)
return CHARCONST;
撤消此更改return将旧标记变为功能。
不能用BEGIN
作为token名,因为token名是作为C值使用的,而BEGIN
是flex定义的宏(你用它来切换启动状态) .
这会导致您在答案中引用的 enum
声明中出现语法错误,结果是 BEGIN
之后的所有枚举成员都未声明。但最重要的错误消息是指枚举声明本身的语法错误:
lex.yy.c:117:15: error: expected identifier before ‘(’ token
#define BEGIN (yy_start) = 1 + 2 *
^
cScan.tab.h:62:5: note: in expansion of macro ‘BEGIN’
BEGIN = 263, /* BEGIN */
^~~~~
出于某种原因,您从问题中省略了。
同样适用于任何宏,包括系统库 headers 中的宏,如果您使用其中任何宏。我通常更喜欢在我的标记名称前加上类似 T_
的前缀,然后使用 bison 别名使语法看起来更漂亮:
%token T_BEGIN "begin"
T_END "end"
// ...
%%
// ...
compoundStmt
: "begin" localDecls stmtList "end"
顺便说一下,如果您实际使用过数据(实际上这对任何事情来说都不是必需的),您的 struct TokenData
将导致未定义的行为。Bison 有很多不需要太多努力的调试机制在你这边。)
举个例子,考虑
{BOOLCONST} {
struct TokenData boolToken;
yylval.token = &boolToken;
yylval.token->tokenclass = 5;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
if(yytext[0] == 't') {
yylval.token->nvalue = 1;
} else {
yylval.token->nvalue = 0;
}
return BOOLCONST;
}
boolToken
是一个自动(“本地”)变量,因此它的生命周期在 return BOOLCONST
执行时结束。 yylval
(yylval.token = &boolToken;
)中存储的地址是一个悬空指针,yylval.token
指向的内容一旦yylex
returns就完全无法预测。此外,如果该内存区域的内容碰巧仍然完好无损,则您存储的其他指针之一:
yylval.token->tokenstr = yytext;
是指向 Flex 内部输入缓冲区的指针,其内容在下次调用时被 yylex
修改(这几乎肯定发生在可以使用 BOOLCONST
的语义值之前,因为 bison-generated 解析器通常会提前读取一个标记。)
我刚刚向我的解析器添加了一组新标记,但每个新标记都被报告为未声明。第一行标记包含在最后一个工作版本中。
%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ
我在 运行 生成我的 makefile 后收到的错误消息表明 none 新标记已正确声明,尽管所有旧标记仍在运行。
cScan.l:44:9: error: ‘STATIC’ undeclared (first use in this function)
static {return STATIC;}
^
cScan.l:44:9: note: each undeclared identifier is reported only once for each function it appears in
cScan.l:45:9: error: ‘BOOL’ undeclared (first use in this function)
bool {return BOOL;}
^
cScan.l:46:9: error: ‘CHAR’ undeclared (first use in this function)
char {return CHAR;}
^
cScan.l:47:10: error: ‘INT’ undeclared (first use in this function)
int { return INT; }
^
cScan.l:48:15: error: expected expression before ‘;’ token
begin { return BEGIN;}
^
cScan.l:49:9: error: ‘END’ undeclared (first use in this function)
end {return END;}
^
cScan.l:50:9: error: ‘IF’ undeclared (first use in this function)
if {return IF;}
^
cScan.l:51:9: error: ‘THEN’ undeclared (first use in this function)
then {return THEN;}
^
cScan.l:52:9: error: ‘ELSE’ undeclared (first use in this function)
else {return ELSE;}
^
cScan.l:53:9: error: ‘WHILE’ undeclared (first use in this function)
while {return WHILE;}
^
cScan.l:54:9: error: ‘DO’ undeclared (first use in this function)
do {return DO;}
^
cScan.l:55:9: error: ‘FOR’ undeclared (first use in this function)
for {return FOR;}
^
cScan.l:56:9: error: ‘TO’ undeclared (first use in this function)
to {return TO;}
^
cScan.l:57:9: error: ‘BY’ undeclared (first use in this function)
by {return BY;}
^
cScan.l:58:9: error: ‘RETURN’ undeclared (first use in this function)
return {return RETURN;}
^
cScan.l:59:9: error: ‘BREAK’ undeclared (first use in this function)
break {return BREAK;}
^
cScan.l:60:9: error: ‘OR’ undeclared (first use in this function)
or {return OR;}
^
cScan.l:61:9: error: ‘AND’ undeclared (first use in this function)
and {return AND;}
^
cScan.l:62:10: error: ‘NOT’ undeclared (first use in this function)
not { return NOT;}
^
cScan.l:64:10: error: ‘DPLUS’ undeclared (first use in this function)
"++" { return DPLUS; }
^
cScan.l:65:10: error: ‘DMINUS’ undeclared (first use in this function)
"--" { return DMINUS; }
^
cScan.l:66:10: error: ‘LASSIGN’ undeclared (first use in this function)
"<-" { return LASSIGN; }
^
cScan.l:67:10: error: ‘PLUSEQ’ undeclared (first use in this function)
"+=" { return PLUSEQ; }
^
cScan.l:68:10: error: ‘MINUSEQ’ undeclared (first use in this function)
"-=" { return MINUSEQ; }
^
cScan.l:69:10: error: ‘TIMEEQ’ undeclared (first use in this function)
"*=" { return TIMEEQ; }
^
cScan.l:70:10: error: ‘DIVEQ’ undeclared (first use in this function)
"/=" { return DIVEQ; }
^
cScan.l:71:10: error: ‘NOTEQ’ undeclared (first use in this function)
"!=" { return NOTEQ; }
这是我 return 每个标记的 flex 文件:
%{
/*
* cScan.l
*/
#include "scanType.h"
#include "cScan.tab.h"
%}
%option yylineno
LETTER [A-Za-z]
ID {LETTER}[_A-Za-z0-9]*
NUMCONST [0-9]+
STRINGCONST \"([^\\"]|\.)*\"
CHARCONST '\?.'
BOOLCONST true|false
%%
{BOOLCONST} {
struct TokenData boolToken;
yylval.token = &boolToken;
yylval.token->tokenclass = 5;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
if(yytext[0] == 't') {
yylval.token->nvalue = 1;
} else {
yylval.token->nvalue = 0;
}
return BOOLCONST;
}
static { return STATIC; }
bool { return BOOL; }
char { return CHAR; }
int { return INT; }
begin { return BEGIN;}
end { return END;}
if { return IF;}
then { return THEN;}
else { return ELSE;}
while { return WHILE;}
do { return DO;}
for { return FOR;}
to { return TO;}
by { return BY;}
return { return RETURN;}
break { return BREAK;}
or { return OR; }
and { return AND; }
not { return NOT;}
"++" { return DPLUS; }
"--" { return DMINUS; }
"<-" { return LASSIGN; }
"+=" { return PLUSEQ; }
"-=" { return MINUSEQ; }
"*=" { return TIMEEQ; }
"/=" { return DIVEQ; }
"!=" { return NOTEQ; }
{ID} {
struct TokenData idToken;
yylval.token = &idToken;
yylval.token->tokenclass = 1;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
yylval.token->svalue = yytext;
return IDENT;
}
{NUMCONST} {
struct TokenData numToken;
yylval.token = &numToken;
yylval.token->tokenclass = 2;
yylval.token->linenum = yylineno;
yylval.token->nvalue = atoi(yytext);
yylval.token->tokenstr = yytext;
return NUMCONST;
}
{STRINGCONST} {
struct TokenData stringToken;
yylval.token = &stringToken;
yylval.token->tokenclass = 3;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
yylval.token->svalue = yytext;
yylval.token->nvalue = yyleng-2;
return STRINGCONST;
}
{CHARCONST} {
struct TokenData charToken;
yylval.token = &charToken;
yylval.token->tokenclass = 4;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
yylval.token->svalue = yytext;
return CHARCONST;
}
"="|"<"|">"|"+"|"-"|"*"|"/"|"%"|"["|"]"|"*"|"-"|"?"|"("|")"|";"|","|":" { return yytext[0]; }
[ \t\r] ;
##.*\n ;
\n { ; /*option to add stuff*/ }
. { printf("ERROR(%d): Invalid or misplaced input character: '%c'. Character Ignored.\n", yylineno, yytext[0]); }
%%
/*
* When the end of an input file is encountered, exit with success (1).
*/
int yywrap() {
return 1;
}
令牌全部列在cScan.tab.h文件中,该文件包含在cScan.l中。这是他们的定义。
/* Token type. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
enum yytokentype
{
NUMCONST = 258,
STRINGCONST = 259,
IDENT = 260,
CHARCONST = 261,
BOOLCONST = 262,
BEGIN = 263,
END = 264,
IF = 265,
THEN = 266,
ELSE = 267,
WHILE = 268,
DO = 269,
FOR = 270,
TO = 271,
BY = 272,
RETURN = 273,
BREAK = 274,
OR = 275,
AND = 276,
NOT = 277,
STATIC = 278,
BOOL = 279,
CHAR = 280,
INT = 281,
DPLUS = 282,
DMINUS = 283,
LASSIGN = 284,
PLUSEQ = 285,
MINUSEQ = 286,
TIMEEQ = 287,
DIVEQ = 288,
NOTEQ = 289
};
#endif
这是我正在 运行 生成的生成文件。我已经删除了每个生成的文件并再次 运行,但这似乎不是问题所在。
cc = gcc
ccopts = #-ly
lex = flex
lexopts =
lexgens = lex.yy.c
yacc = bison
yaccopts = -d
yaccgens = cScan.tab.c cScan.tab.h
prj = cScan
$(prj): $(lexgens) $(yaccgens)
$(cc) $(lexgens) $(yaccgens) $(ccopts) -o $(prj)
clean:
rm $(lexgens) $(yaccgens) $(prj)
$(yaccgens): $(prj).y
$(yacc) $(yaccopts) $(prj).y
$(lexgens): $(prj).l $(yaccgens)
$(lex) $(lexopts) $(prj).l
为了完整起见,这是整个 bison 文件。
%{
#include "scanType.h"
#include "treeType.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
void yyerror(char*);
int yylex(void);
extern FILE *yyin;
%}
%define parse.error verbose
%union {
struct TokenData *token;//for terminals, from yylex
struct TreeNode *tree;//for nonterminals, to build the tree
char op;
}
%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ
%%
program :
declList
;
declList
: declList decl
| decl
;
decl
: varDecl
| funDecl
;
varDecl
: typeSpec varDeclList ';'
;
scopedVarDecl
: STATIC typeSpec varDeclList ';'
| typeSpec varDeclList ';'
;
varDeclList
: varDeclList ',' varDeclInit
| varDeclInit
;
varDeclInit
: varDeclId
| varDeclId ':' simpleExp
;
varDeclId
: IDENT
| IDENT '[' NUMCONST ']'
;
typeSpec
: BOOL
| CHAR
| INT
;
funDecl
: typeSpec IDENT '(' parms ')' compoundStmt
| IDENT '(' parms ')' compoundStmt
;
parms
: parmList
| {/*Epsilon*/}
;
parmList
: parmList ';' parmTypeList
| parmTypeList
;
parmTypeList
: typeSpec parmIdList
;
parmIdList
: parmIdList ',' parmId
| parmId
;
parmId
: IDENT
| IDENT '['']'
;
stmt
: matchStmt
| unmatchStmt
;
matchStmt
: selectStmt_M
| iterStmt_M
| otherStmt
;
unmatchStmt
: selectStmt_U
| iterStmt_U
;
selectStmt_M
: IF simpleExp THEN matchStmt ELSE matchStmt
;
selectStmt_U
: IF simpleExp THEN stmt
| IF simpleExp THEN matchStmt ELSE unmatchStmt
;
iterStmt_U
: WHILE simpleExp DO unmatchStmt
| FOR IDENT LASSIGN iterRange DO unmatchStmt
;
iterStmt_M
: WHILE simpleExp DO matchStmt
| FOR IDENT LASSIGN iterRange DO matchStmt
;
iterRange
: simpleExp TO simpleExp iterRangeStmtPr
;
iterRangeStmtPr
: BY simpleExp
| {/*Addition to stop ambiguity*/}
;
otherStmt
: expStmt
| returnStmt
| breakStmt
| compoundStmt
;
compoundStmt
: BEGIN localDecls stmtList END
;
localDecls
: localDecls scopedVarDecl
| {/*Epsilon*/}
;
stmtList
: stmtList stmt
| {/*Epsilon*/}
;
expStmt
: exp ';'
| ';'
;
returnStmt
: RETURN ';'
| RETURN exp ';'
;
breakStmt
: BREAK ';'
;
exp
: mutExp
| simpleExp
;
mutExp
: mutable assignop exp
| mutable DPLUS
| mutable DMINUS
;
assignop
: LASSIGN | PLUSEQ | MINUSEQ | TIMEEQ | DIVEQ
;
simpleExp
: simpleExp OR andExp
| andExp
;
andExp
: andExp AND unaryRelExp
| unaryRelExp
;
unaryRelExp
: NOT unaryRelExp
| relExp
;
relExp
: sumExp relop sumExp
| sumExp
;
relop
: '<' | '<' '=' | '>' | '>' '=' | '=' | NOTEQ
;
sumExp
: sumExp sumop mulExp
| mulExp
;
sumop
: '+' | '-'
;
mulExp
: mulExp mulop unaryExp
| unaryExp
;
mulop
: '*' | '/' | '%'
;
unaryExp
: unaryop unaryExp
| factor
;
unaryop
: '-' | '*' | '?'
;
factor
: mutable
| immutable
;
mutable
: IDENT
| IDENT '[' exp ']'
;
immutable
: '(' exp ')'
| call
| constant
;
call
: IDENT '(' args ')'
;
args
: argList
| {/*Epsilon*/}
;
argList
: argList ',' exp
| exp
;
constant
: NUMCONST | STRINGCONST | CHARCONST | BOOLCONST
;
%%
int main(int argc, char *argv[])
{
FILE * fp;
if(argc > 1) {
fp = fopen (argv[1], "r");
yyin = fp;
} else {
yyin = stdin;
}
yyparse();
return 0;
}
void yyerror(char* s)
{
printf("yyerror: \"%s\"\n", s);
}
编辑: ScanType.h
#ifndef TOKNDATA_H
#define TOKNDATA_H __DATE__" "__TIME__
struct TokenData {
int tokenclass; // token class
int linenum; // line where found
char *tokenstr; // what string was actually read
char cvalue; // any character value
int nvalue; // any numeric value or Boolean value
char *svalue; // any string value e.g. an id
} * useToken;
#endif /*TOKNDATA_H*/
编辑 2:
交换标记在 bison 文件中的位置意味着旧标记也未声明。
像这样改变顺序后
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ
%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
我收到以下错误日志。
cScan.l:44:10: error: ‘STATIC’ undeclared (first use in this function)
static { return STATIC; }
^
cScan.l:45:10: error: ‘BOOL’ undeclared (first use in this function)
bool { return BOOL; }
^
cScan.l:46:10: error: ‘CHAR’ undeclared (first use in this function)
char { return CHAR; }
^
cScan.l:47:10: error: ‘INT’ undeclared (first use in this function)
int { return INT; }
^
cScan.l:48:15: error: expected expression before ‘;’ token
begin { return BEGIN;}
^
cScan.l:49:10: error: ‘END’ undeclared (first use in this function)
end { return END;}
^
cScan.l:50:10: error: ‘IF’ undeclared (first use in this function)
if { return IF;}
^
cScan.l:51:10: error: ‘THEN’ undeclared (first use in this function)
then { return THEN;}
^
cScan.l:52:10: error: ‘ELSE’ undeclared (first use in this function)
else { return ELSE;}
^
cScan.l:53:10: error: ‘WHILE’ undeclared (first use in this function)
while { return WHILE;}
^
cScan.l:54:10: error: ‘DO’ undeclared (first use in this function)
do { return DO;}
^
cScan.l:55:10: error: ‘FOR’ undeclared (first use in this function)
for { return FOR;}
^
cScan.l:56:10: error: ‘TO’ undeclared (first use in this function)
to { return TO;}
^
cScan.l:57:10: error: ‘BY’ undeclared (first use in this function)
by { return BY;}
^
cScan.l:58:10: error: ‘RETURN’ undeclared (first use in this function)
return { return RETURN;}
^
cScan.l:59:10: error: ‘BREAK’ undeclared (first use in this function)
break { return BREAK;}
^
cScan.l:60:10: error: ‘OR’ undeclared (first use in this function)
or { return OR; }
^
cScan.l:61:10: error: ‘AND’ undeclared (first use in this function)
and { return AND; }
^
cScan.l:62:10: error: ‘NOT’ undeclared (first use in this function)
not { return NOT;}
^
cScan.l:64:10: error: ‘DPLUS’ undeclared (first use in this function)
"++" { return DPLUS; }
^
cScan.l:65:10: error: ‘DMINUS’ undeclared (first use in this function)
"--" { return DMINUS; }
^
cScan.l:66:10: error: ‘LASSIGN’ undeclared (first use in this function)
"<-" { return LASSIGN; }
^
cScan.l:67:10: error: ‘PLUSEQ’ undeclared (first use in this function)
"+=" { return PLUSEQ; }
^
cScan.l:68:10: error: ‘MINUSEQ’ undeclared (first use in this function)
"-=" { return MINUSEQ; }
^
cScan.l:69:10: error: ‘TIMEEQ’ undeclared (first use in this function)
"*=" { return TIMEEQ; }
^
cScan.l:70:10: error: ‘DIVEQ’ undeclared (first use in this function)
"/=" { return DIVEQ; }
^
cScan.l:71:10: error: ‘NOTEQ’ undeclared (first use in this function)
"!=" { return NOTEQ; }
^
cScan.l:80:12: error: ‘IDENT’ undeclared (first use in this function)
return IDENT;
^
cScan.l:90:12: error: ‘NUMCONST’ undeclared (first use in this function)
return NUMCONST;
^
cScan.l:101:12: error: ‘STRINGCONST’ undeclared (first use in this function)
return STRINGCONST;
^
cScan.l:112:12: error: ‘CHARCONST’ undeclared (first use in this function)
return CHARCONST;
撤消此更改return将旧标记变为功能。
不能用BEGIN
作为token名,因为token名是作为C值使用的,而BEGIN
是flex定义的宏(你用它来切换启动状态) .
这会导致您在答案中引用的 enum
声明中出现语法错误,结果是 BEGIN
之后的所有枚举成员都未声明。但最重要的错误消息是指枚举声明本身的语法错误:
lex.yy.c:117:15: error: expected identifier before ‘(’ token
#define BEGIN (yy_start) = 1 + 2 *
^
cScan.tab.h:62:5: note: in expansion of macro ‘BEGIN’
BEGIN = 263, /* BEGIN */
^~~~~
出于某种原因,您从问题中省略了。
同样适用于任何宏,包括系统库 headers 中的宏,如果您使用其中任何宏。我通常更喜欢在我的标记名称前加上类似 T_
的前缀,然后使用 bison 别名使语法看起来更漂亮:
%token T_BEGIN "begin"
T_END "end"
// ...
%%
// ...
compoundStmt
: "begin" localDecls stmtList "end"
顺便说一下,如果您实际使用过数据(实际上这对任何事情来说都不是必需的),您的 struct TokenData
将导致未定义的行为。Bison 有很多不需要太多努力的调试机制在你这边。)
举个例子,考虑
{BOOLCONST} {
struct TokenData boolToken;
yylval.token = &boolToken;
yylval.token->tokenclass = 5;
yylval.token->linenum = yylineno;
yylval.token->tokenstr = yytext;
if(yytext[0] == 't') {
yylval.token->nvalue = 1;
} else {
yylval.token->nvalue = 0;
}
return BOOLCONST;
}
boolToken
是一个自动(“本地”)变量,因此它的生命周期在 return BOOLCONST
执行时结束。 yylval
(yylval.token = &boolToken;
)中存储的地址是一个悬空指针,yylval.token
指向的内容一旦yylex
returns就完全无法预测。此外,如果该内存区域的内容碰巧仍然完好无损,则您存储的其他指针之一:
yylval.token->tokenstr = yytext;
是指向 Flex 内部输入缓冲区的指针,其内容在下次调用时被 yylex
修改(这几乎肯定发生在可以使用 BOOLCONST
的语义值之前,因为 bison-generated 解析器通常会提前读取一个标记。)