为什么我不能在 flex/bison 中声明新令牌?

Why can't I declare new tokens in flex/bison?

我刚刚向我的解析器添加了一组新标记,但每个新标记都被报告为未声明。第一行标记包含在最后一个工作版本中。

%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT 
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ

我在 运行 生成我的 makefile 后收到的错误消息表明 none 新标记已正确声明,尽管所有旧标记仍在运行。

cScan.l:44:9: error: ‘STATIC’ undeclared (first use in this function)
 static  {return STATIC;}
         ^
cScan.l:44:9: note: each undeclared identifier is reported only once for each function it appears in
cScan.l:45:9: error: ‘BOOL’ undeclared (first use in this function)
 bool    {return BOOL;}
         ^
cScan.l:46:9: error: ‘CHAR’ undeclared (first use in this function)
 char    {return CHAR;}
         ^
cScan.l:47:10: error: ‘INT’ undeclared (first use in this function)
 int     { return INT; }
          ^
cScan.l:48:15: error: expected expression before ‘;’ token
 begin    { return BEGIN;}
               ^
cScan.l:49:9: error: ‘END’ undeclared (first use in this function)
 end    {return END;}
         ^
cScan.l:50:9: error: ‘IF’ undeclared (first use in this function)
 if    {return IF;}
         ^
cScan.l:51:9: error: ‘THEN’ undeclared (first use in this function)
 then    {return THEN;}
         ^
cScan.l:52:9: error: ‘ELSE’ undeclared (first use in this function)
 else    {return ELSE;}
         ^
cScan.l:53:9: error: ‘WHILE’ undeclared (first use in this function)
 while    {return WHILE;}
         ^
cScan.l:54:9: error: ‘DO’ undeclared (first use in this function)
 do    {return DO;}
         ^
cScan.l:55:9: error: ‘FOR’ undeclared (first use in this function)
 for    {return FOR;}
         ^
cScan.l:56:9: error: ‘TO’ undeclared (first use in this function)
 to    {return TO;}
         ^
cScan.l:57:9: error: ‘BY’ undeclared (first use in this function)
 by    {return BY;}
         ^
cScan.l:58:9: error: ‘RETURN’ undeclared (first use in this function)
 return    {return RETURN;}
         ^
cScan.l:59:9: error: ‘BREAK’ undeclared (first use in this function)
 break    {return BREAK;}
         ^
cScan.l:60:9: error: ‘OR’ undeclared (first use in this function)
 or    {return OR;}
         ^
cScan.l:61:9: error: ‘AND’ undeclared (first use in this function)
 and    {return AND;}
         ^
cScan.l:62:10: error: ‘NOT’ undeclared (first use in this function)
 not { return NOT;}
          ^
cScan.l:64:10: error: ‘DPLUS’ undeclared (first use in this function)
 "++" { return DPLUS; }
          ^
cScan.l:65:10: error: ‘DMINUS’ undeclared (first use in this function)
 "--" { return DMINUS; }
          ^
cScan.l:66:10: error: ‘LASSIGN’ undeclared (first use in this function)
 "<-" { return LASSIGN; }
          ^
cScan.l:67:10: error: ‘PLUSEQ’ undeclared (first use in this function)
 "+=" { return PLUSEQ; }
          ^
cScan.l:68:10: error: ‘MINUSEQ’ undeclared (first use in this function)
 "-=" { return MINUSEQ; }
          ^
cScan.l:69:10: error: ‘TIMEEQ’ undeclared (first use in this function)
 "*=" { return TIMEEQ; }
          ^
cScan.l:70:10: error: ‘DIVEQ’ undeclared (first use in this function)
 "/=" { return DIVEQ; }
          ^
cScan.l:71:10: error: ‘NOTEQ’ undeclared (first use in this function)
 "!=" { return NOTEQ; }

这是我 return 每个标记的 flex 文件:

%{
/*
 * cScan.l
 */
 #include "scanType.h"
 #include "cScan.tab.h"

%}

%option yylineno

LETTER   [A-Za-z]
ID       {LETTER}[_A-Za-z0-9]*
NUMCONST [0-9]+
STRINGCONST \"([^\\"]|\.)*\"
CHARCONST '\?.'
BOOLCONST true|false

%%

{BOOLCONST} {
    struct TokenData boolToken;
    yylval.token = &boolToken;
    yylval.token->tokenclass = 5;
    yylval.token->linenum = yylineno;
    yylval.token->tokenstr = yytext;
    if(yytext[0] == 't') { 
        yylval.token->nvalue = 1;
    } else {
        yylval.token->nvalue = 0;
    }
    return BOOLCONST;
} 

static  { return STATIC; }
bool    { return BOOL; }
char    { return CHAR; }
int     { return INT; }
begin    { return BEGIN;}
end    { return END;}
if    { return IF;}
then    { return THEN;}
else    { return ELSE;}
while    { return WHILE;}
do    { return DO;}
for    { return FOR;}
to    { return TO;}
by    { return BY;}
return    { return RETURN;}
break    { return BREAK;}
or    { return OR; }
and    { return AND; }
not { return NOT;}

"++" { return DPLUS; }
"--" { return DMINUS; }
"<-" { return LASSIGN; }
"+=" { return PLUSEQ; }
"-=" { return MINUSEQ; }
"*=" { return TIMEEQ; }
"/=" { return DIVEQ; }
"!=" { return NOTEQ; }

{ID}        {
    struct TokenData idToken;
    yylval.token = &idToken; 
    yylval.token->tokenclass = 1;
    yylval.token->linenum = yylineno;
    yylval.token->tokenstr = yytext;
    yylval.token->svalue = yytext;
    return IDENT; 
}

{NUMCONST} {
    struct TokenData numToken;
    yylval.token = &numToken;
    yylval.token->tokenclass = 2;
    yylval.token->linenum = yylineno;
    yylval.token->nvalue = atoi(yytext);
    yylval.token->tokenstr = yytext;
    return NUMCONST; 
}

{STRINGCONST}   {
    struct TokenData stringToken;
    yylval.token = &stringToken;
    yylval.token->tokenclass = 3;
    yylval.token->linenum = yylineno;
    yylval.token->tokenstr = yytext;
    yylval.token->svalue = yytext;
    yylval.token->nvalue = yyleng-2;
    return STRINGCONST;
}

{CHARCONST}   {
    struct TokenData charToken;
    yylval.token = &charToken;
    yylval.token->tokenclass = 4;
    yylval.token->linenum = yylineno;
    yylval.token->tokenstr = yytext;
    yylval.token->svalue = yytext;

    return CHARCONST;
}

"="|"<"|">"|"+"|"-"|"*"|"/"|"%"|"["|"]"|"*"|"-"|"?"|"("|")"|";"|","|":" { return yytext[0]; }



[ \t\r]         ;

##.*\n          ;

\n              { ; /*option to add stuff*/ }

.               { printf("ERROR(%d): Invalid or misplaced input character: '%c'. Character Ignored.\n", yylineno, yytext[0]); }
%%

/*
 * When the end of an input file is encountered, exit with success (1).
 */
int yywrap() {
    return 1;
}

令牌全部列在cScan.tab.h文件中,该文件包含在cScan.l中。这是他们的定义。

/* Token type.  */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
  enum yytokentype
  {
    NUMCONST = 258,
    STRINGCONST = 259,
    IDENT = 260,
    CHARCONST = 261,
    BOOLCONST = 262,
    BEGIN = 263,
    END = 264,
    IF = 265,
    THEN = 266,
    ELSE = 267,
    WHILE = 268,
    DO = 269,
    FOR = 270,
    TO = 271,
    BY = 272,
    RETURN = 273,
    BREAK = 274,
    OR = 275,
    AND = 276,
    NOT = 277,
    STATIC = 278,
    BOOL = 279,
    CHAR = 280,
    INT = 281,
    DPLUS = 282,
    DMINUS = 283,
    LASSIGN = 284,
    PLUSEQ = 285,
    MINUSEQ = 286,
    TIMEEQ = 287,
    DIVEQ = 288,
    NOTEQ = 289
  };
#endif

这是我正在 运行 生成的生成文件。我已经删除了每个生成的文件并再次 运行,但这似乎不是问题所在。

cc = gcc
ccopts = #-ly
lex = flex
lexopts =
lexgens = lex.yy.c
yacc = bison
yaccopts = -d
yaccgens = cScan.tab.c cScan.tab.h
prj = cScan

$(prj): $(lexgens) $(yaccgens)
    $(cc) $(lexgens) $(yaccgens) $(ccopts) -o $(prj)

clean:
    rm $(lexgens) $(yaccgens) $(prj)

$(yaccgens): $(prj).y
    $(yacc) $(yaccopts) $(prj).y

$(lexgens): $(prj).l $(yaccgens)
    $(lex) $(lexopts) $(prj).l

为了完整起见,这是整个 bison 文件。

%{
#include "scanType.h"
#include "treeType.h"

#include <string.h>
#include <stdio.h>
#include <stdlib.h>

void yyerror(char*);
int yylex(void);
extern FILE *yyin;

%}

%define parse.error verbose

%union {
    struct TokenData *token;//for terminals, from yylex
    struct TreeNode *tree;//for nonterminals, to build the tree
    char op;
}


%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST
%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT 
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ



%%

program : 
    declList
    ;

declList
    : declList decl
    | decl 
    ;

decl
    : varDecl
    | funDecl 
    ;

varDecl
    : typeSpec varDeclList ';' 
    ;

scopedVarDecl
    : STATIC typeSpec varDeclList ';'
    | typeSpec varDeclList ';' 
    ;

varDeclList
    : varDeclList ',' varDeclInit
    | varDeclInit 
    ;

varDeclInit
    : varDeclId
    | varDeclId ':' simpleExp 
    ;

varDeclId
    : IDENT
    | IDENT '[' NUMCONST ']' 
    ;

typeSpec
    : BOOL
    | CHAR
    | INT 
    ;

funDecl
    : typeSpec IDENT '(' parms ')' compoundStmt
    | IDENT '(' parms ')' compoundStmt 
    ;

parms
    : parmList
    | {/*Epsilon*/} 
    ;

parmList
    : parmList ';' parmTypeList
    | parmTypeList
    ;

parmTypeList
    : typeSpec parmIdList
    ;

parmIdList
    : parmIdList ',' parmId 
    | parmId
    ;

parmId
    : IDENT
    | IDENT '['']'
    ;

stmt
    : matchStmt
    | unmatchStmt
    ;

matchStmt
    : selectStmt_M
    | iterStmt_M
    | otherStmt
    ;

unmatchStmt
    : selectStmt_U
    | iterStmt_U
    ;

selectStmt_M
    : IF simpleExp THEN matchStmt ELSE matchStmt
    ;

selectStmt_U
    : IF simpleExp THEN stmt
    | IF simpleExp THEN matchStmt ELSE unmatchStmt
    ;

iterStmt_U
    : WHILE simpleExp DO unmatchStmt
    | FOR IDENT LASSIGN iterRange DO unmatchStmt
    ;

iterStmt_M
    : WHILE simpleExp DO matchStmt
    | FOR IDENT LASSIGN iterRange DO matchStmt
    ;

iterRange
    : simpleExp TO simpleExp iterRangeStmtPr
    ;

iterRangeStmtPr
    : BY simpleExp
    | {/*Addition to stop ambiguity*/} 
    ;

otherStmt
    : expStmt
    | returnStmt
    | breakStmt
    | compoundStmt
    ;

compoundStmt
    :  BEGIN localDecls stmtList END
    ;

localDecls
    : localDecls scopedVarDecl
    | {/*Epsilon*/} 
    ;

stmtList
    : stmtList stmt
    | {/*Epsilon*/} 
    ;

expStmt
    : exp ';'
    | ';' 
    ;

returnStmt
    : RETURN ';'
    | RETURN exp ';'
    ;

breakStmt
    : BREAK ';'
    ;

exp
    : mutExp
    | simpleExp
    ;

mutExp
    : mutable assignop exp
    | mutable DPLUS
    | mutable DMINUS
    ;

assignop
    : LASSIGN | PLUSEQ | MINUSEQ | TIMEEQ | DIVEQ
    ;

simpleExp
    : simpleExp OR andExp
    | andExp
    ;

andExp
    : andExp AND unaryRelExp
    | unaryRelExp
    ;

unaryRelExp
    : NOT unaryRelExp
    | relExp
    ; 

relExp
    : sumExp relop sumExp
    | sumExp
    ;

relop
    : '<' | '<' '=' | '>' | '>' '=' | '=' | NOTEQ
    ;

sumExp
    : sumExp sumop mulExp
    | mulExp
    ;
    
sumop
    : '+' | '-'
    ;


mulExp
    : mulExp mulop unaryExp  
    | unaryExp
    ;

mulop
    : '*' | '/' | '%'
    ;

unaryExp
    : unaryop unaryExp 
    | factor
    ;

unaryop
    : '-' | '*' | '?'
    ;

factor
    : mutable 
    | immutable
    ;

mutable
    : IDENT 
    | IDENT '[' exp ']'
    ;

immutable
    : '(' exp ')'
    | call
    | constant
    ;

call
    : IDENT '(' args ')'
    ;

args
    : argList
    | {/*Epsilon*/} 
    ;

argList
    : argList ',' exp
    | exp 
    ;

constant
    : NUMCONST | STRINGCONST | CHARCONST | BOOLCONST
    ;

%%

int main(int argc, char *argv[])
{
    FILE * fp;
    if(argc > 1) {
        fp = fopen (argv[1], "r");
        yyin = fp;
    } else {
        yyin = stdin;
    }
    
    yyparse();
    return 0;
}

void yyerror(char* s)
{
    printf("yyerror: \"%s\"\n", s);
}

编辑: ScanType.h

#ifndef TOKNDATA_H
#define TOKNDATA_H __DATE__" "__TIME__

struct TokenData {
    int tokenclass; // token class
    int linenum; // line where found
    char *tokenstr; // what string was actually read
    char cvalue; // any character value
    int nvalue; // any numeric value or Boolean value
    char *svalue; // any string value e.g. an id
} * useToken;

#endif /*TOKNDATA_H*/

编辑 2:

交换标记在 bison 文件中的位置意味着旧标记也未声明。

像这样改变顺序后

%token <token> BEGIN END IF THEN ELSE WHILE DO FOR TO BY RETURN BREAK OR AND NOT STATIC BOOL CHAR INT 
%token <token> DPLUS DMINUS LASSIGN PLUSEQ MINUSEQ TIMEEQ DIVEQ NOTEQ
%token <token> NUMCONST STRINGCONST IDENT CHARCONST BOOLCONST

我收到以下错误日志。

cScan.l:44:10: error: ‘STATIC’ undeclared (first use in this function)
 static  { return STATIC; }
          ^
cScan.l:45:10: error: ‘BOOL’ undeclared (first use in this function)
 bool    { return BOOL; }
          ^
cScan.l:46:10: error: ‘CHAR’ undeclared (first use in this function)
 char    { return CHAR; }
          ^
cScan.l:47:10: error: ‘INT’ undeclared (first use in this function)
 int     { return INT; }
          ^
cScan.l:48:15: error: expected expression before ‘;’ token
 begin    { return BEGIN;}
               ^
cScan.l:49:10: error: ‘END’ undeclared (first use in this function)
 end    { return END;}
          ^
cScan.l:50:10: error: ‘IF’ undeclared (first use in this function)
 if    { return IF;}
          ^
cScan.l:51:10: error: ‘THEN’ undeclared (first use in this function)
 then    { return THEN;}
          ^
cScan.l:52:10: error: ‘ELSE’ undeclared (first use in this function)
 else    { return ELSE;}
          ^
cScan.l:53:10: error: ‘WHILE’ undeclared (first use in this function)
 while    { return WHILE;}
          ^
cScan.l:54:10: error: ‘DO’ undeclared (first use in this function)
 do    { return DO;}
          ^
cScan.l:55:10: error: ‘FOR’ undeclared (first use in this function)
 for    { return FOR;}
          ^
cScan.l:56:10: error: ‘TO’ undeclared (first use in this function)
 to    { return TO;}
          ^
cScan.l:57:10: error: ‘BY’ undeclared (first use in this function)
 by    { return BY;}
          ^
cScan.l:58:10: error: ‘RETURN’ undeclared (first use in this function)
 return    { return RETURN;}
          ^
cScan.l:59:10: error: ‘BREAK’ undeclared (first use in this function)
 break    { return BREAK;}
          ^
cScan.l:60:10: error: ‘OR’ undeclared (first use in this function)
 or    { return OR; }
          ^
cScan.l:61:10: error: ‘AND’ undeclared (first use in this function)
 and    { return AND; }
          ^
cScan.l:62:10: error: ‘NOT’ undeclared (first use in this function)
 not { return NOT;}
          ^
cScan.l:64:10: error: ‘DPLUS’ undeclared (first use in this function)
 "++" { return DPLUS; }
          ^
cScan.l:65:10: error: ‘DMINUS’ undeclared (first use in this function)
 "--" { return DMINUS; }
          ^
cScan.l:66:10: error: ‘LASSIGN’ undeclared (first use in this function)
 "<-" { return LASSIGN; }
          ^
cScan.l:67:10: error: ‘PLUSEQ’ undeclared (first use in this function)
 "+=" { return PLUSEQ; }
          ^
cScan.l:68:10: error: ‘MINUSEQ’ undeclared (first use in this function)
 "-=" { return MINUSEQ; }
          ^
cScan.l:69:10: error: ‘TIMEEQ’ undeclared (first use in this function)
 "*=" { return TIMEEQ; }
          ^
cScan.l:70:10: error: ‘DIVEQ’ undeclared (first use in this function)
 "/=" { return DIVEQ; }
          ^
cScan.l:71:10: error: ‘NOTEQ’ undeclared (first use in this function)
 "!=" { return NOTEQ; }
          ^
cScan.l:80:12: error: ‘IDENT’ undeclared (first use in this function)
     return IDENT;
            ^
cScan.l:90:12: error: ‘NUMCONST’ undeclared (first use in this function)
     return NUMCONST;
            ^
cScan.l:101:12: error: ‘STRINGCONST’ undeclared (first use in this function)
     return STRINGCONST;
            ^
cScan.l:112:12: error: ‘CHARCONST’ undeclared (first use in this function)
     return CHARCONST;

撤消此更改return将旧标记变为功能。

不能用BEGIN作为token名,因为token名是作为C值使用的,而BEGIN是flex定义的宏(你用它来切换启动状态) .

这会导致您在答案中引用的 enum 声明中出现语法错误,结果是 BEGIN 之后的所有枚举成员都未声明。但最重要的错误消息是指枚举声明本身的语法错误:

lex.yy.c:117:15: error: expected identifier before ‘(’ token
 #define BEGIN (yy_start) = 1 + 2 *
               ^
cScan.tab.h:62:5: note: in expansion of macro ‘BEGIN’
     BEGIN = 263,                   /* BEGIN  */
     ^~~~~

出于某种原因,您从问题中省略了。

同样适用于任何宏,包括系统库 headers 中的宏,如果您使用其中任何宏。我通常更喜欢在我的标记名称前加上类似 T_ 的前缀,然后使用 bison 别名使语法看起来更漂亮:

%token T_BEGIN "begin"
       T_END   "end"
// ...
%%
// ...
compoundStmt
    :  "begin" localDecls stmtList "end"

顺便说一下,如果您实际使用过数据(实际上这对任何事情来说都不是必需的),您的 struct TokenData 将导致未定义的行为。Bison 有很多不需要太多努力的调试机制在你这边。)

举个例子,考虑

 {BOOLCONST} {
    struct TokenData boolToken;
    yylval.token = &boolToken;
    yylval.token->tokenclass = 5;
    yylval.token->linenum = yylineno;
    yylval.token->tokenstr = yytext;
    if(yytext[0] == 't') { 
        yylval.token->nvalue = 1;
    } else {
        yylval.token->nvalue = 0;
    }
    return BOOLCONST;
} 

boolToken 是一个自动(“本地”)变量,因此它的生命周期在 return BOOLCONST 执行时结束。 yylval(yylval.token = &boolToken;)中存储的地址是一个悬空指针,yylval.token指向的内容一旦yylexreturns就完全无法预测。此外,如果该内存区域的内容碰巧仍然完好无损,则您存储的其他指针之一:

yylval.token->tokenstr = yytext;

是指向 Flex 内部输入缓冲区的指针,其内容在下次调用时被 yylex 修改(这几乎肯定发生在可以使用 BOOLCONST 的语义值之前,因为 bison-generated 解析器通常会提前读取一个标记。)