使用此语法在 flex/bison 中出现语法错误

Question

我正在尝试为编译器生成中间代码。我在 macOS 上使用 flex-bison。当我尝试输入时出现语法错误，我不知道该语法错误从何而来。

这是我的词法：

%{
#include<stdbool.h>
#include <stdlib.h>
#include <string.h>
//#include "LinkedList.h"
#include "parser.tab.h"
char* tempString;
%}
%option yylineno

whole   0|[1-9][0-9]*
real    {whole}\.(([0-9])|([0-9][0-9]*[1-9]))
divisionop [/]
noSymbol    [^ \t\n\/\+\-\*\:\;\,\(\)\.]
error   {noSymbol}+

%%


[ \t\n\r]+  ;



\#[-+]?{whole}/[\ :\,\;\+\-\/\*\)\n\r]|".GT."|".GE."|".EQ."|".NE."|".LT."|".LE."|":="   {

    char* string = (char*) malloc(strlen(yytext));
    strcpy(string, yytext + 1);
    string[strlen(string)] = 0;
    //printf("Found integer : %d\n", atoi(string));
    //push(string,INTEGER,yylineno);
    return INTEGER;}


\#[-+]?{real}/[\ :\,\;\+\-\/\*\)\n\r]|".GT."|".GE."|".EQ."|".NE."|".LT."|".LE."|":="    {
    char* string = (char*) malloc(strlen(yytext));
    strcpy(string, yytext + 1);
    string[strlen(string)] = 0;
    //printf("Found real:%s\n",string);
    //push(string,REAL,0);
    return REALNUM;
}

[a-zA-Z][0-9][0-9a-zA-Z]*   {
    //printf("Found identifier:%s\n",yytext);
    //yylval.str = yytext;
    //printf("****lvalue is:%s\n",yylval.str);
    //push(yytext,IDENTIFIER,yylineno);
    tempString = (char*) malloc(strlen(yytext));
    strcpy(tempString, yytext);
    return IDENTIFIER;
}

Program {
    //printf("Found program :%s\n",yytext);
    return PROGRAM;

}

Int {
    //printf("Found int :%s\n",yytext);
    return INT;

}

Real    {
    //printf("Found real :%s\n",yytext);
    return REAL;

}

Bool    {
    //printf("Found bool :%s\n",yytext);
    return BOOL;

}

Procedure   {
    //printf("Found real :%s\n",yytext);
    return PROCEDURE;

}

Function    {
    //printf("Found function :%s\n",yytext);
    return FUNCTION;

}

Begin   {
    //printf("Found begin :%s\n",yytext);
    return BEGINN;

}

End {
    //printf("Found end :%s\n",yytext);
    return END;

}

If  {
    //printf("Found if :%s\n",yytext);
    return IF;
}

Then {
    //printf("Found then :%s\n",yytext);
    return THEN;

}

Else {
    //printf("Found else :%s\n",yytext);
    return ELSE;
}

While {
    //printf("Found while :%s\n",yytext);
    return WHILE;
}

Do {
    //printf("Found do :%s\n",yytext);
    return DO;
}

For {
    //printf("Found for :%s\n",yytext);
    return FOR;
}

To {
    //printf("Found to :%s\n",yytext);
    return TO;
}

Downto {
    //printf("Found downto :%s\n",yytext);
    return DOWNTO;
}

Case {
    //printf("Found case :%s\n",yytext);
    return CASE;
}

Return {
    //printf("Found return :%s\n",yytext);
    return RETURN;
}

"And Then" {
    //printf("Found andthen :%s\n",yytext);
    //push(yytext,ANDTHEN,yylineno);
    return ANDTHEN;
}

"Or Else" {
    //printf("Found andthen :%s\n",yytext);
    //push(yytext,ORELSE,yylineno);
    return ORELSE;
}

"+" {
    //printf("Found plus :%s\n",yytext);
    return PLUS;
}

\-  {
    //printf("Found minus :%s\n",yytext);
    return MINUS;

}

\*  {
    //printf("Found multiply :%s\n",yytext);
    return MULTIPLY;
}

{divisionop}    {
    //printf("Found division :%s\n",yytext);
    //push(yytext,DIVISION,yylineno);
    return DIVISION;
}

".GT."  {
    //printf("Found greaterthan :%s\n",yytext);
    return GREATERTHAN;
}

".GE."  {
    //printf("Found greaterequal :%s\n",yytext);
    return GREATEREQUAL;
}

\.NE\.  {
    //printf("Found notequal :%s\n",yytext);
    return NOTEQUAL;
}

\.EQ\.  {
    //printf("Found EQUAL :%s\n",yytext);
    return EQUAL;
}

\.LT\.  {
    //printf("Found lessthan :%s\n",yytext);
    return LESSTHAN;
}

\.LE\.  {
    //printf("Found lessequal :%s\n",yytext);
    return LESSEQUAL;
}

[,] {
    //printf("Found comma :%s\n",yytext);
    return COMMA;
}

":="    {
    //printf("Found declare :%s\n",yytext);
    return DECLARE;
}

[:] {
    //printf("Found semicolon :%s\n",yytext);
    return COLON;
}

[;] {
    //printf("Found semicolon :%s\n",yytext);
    return SEMICOLON;
}


\( {
    //printf("Found oppar :%s\n",yytext);

    return OPPAR;

}

\)  {
    //printf("Found cppar :%s\n",yytext);
    return CPAR;
}

False   {
    //printf("Found false :%s\n",yytext);
    return FALSE;
}

True    {
    //printf("Found true :%s\n",yytext);
    return TRUE;
}

{error} {
    printf("Error on : **<<  %s  >>**\n", yytext);
    //yymore();
}
\.  {
    printf("Error on : **<<  illegal use of \" %s \" >>**\n", yytext);
    //yymore();
}

%%


int yywrap(){
    return 1;
}

//int main(){
//  yyin = fopen("input2P.txt", "r");
//  initLinkedList(table);
//  while(yylex());
//  printLinkedList();
//  return 0;
//}

这是我的parser.y

%{
#include<stdio.h>
#include "stack.h"
//#include "LinkedList.h"
//#include "symbol_table.h"

int currentType;
char* returnType;

extern FILE* yyin;
extern char* tempString;
struct Stack* tblptrStack;

char* new_temp(char *c) {
    string name("t");
    name += to_string(num);
    num++;
    char *what = (char *) malloc(sizeof(char) * 100);
    strcpy(what, name.c_str());
    symbol_table_insert(what, c);
    strcpy(what, symbol_table_lookup(what).id.c_str());
    return what;
}


int yylex();
void yyerror(char* error);
struct SymbolTable* secondArg(struct SymbolTable* a,struct SymbolTable* b );
%}

%union{
  char *str;
   struct {
        int quad;
        int is_boolean;
        char *place;
        char *code;
        char *type;
   } eval;
}

}

%start program
%token INTEGER
%token ZERO
%token REALNUM
%token PROGRAM
%token INT
%token REAL
%token BOOL
%token PROCEDURE
%token FUNCTION
%token BEGINN
%token END
%token IF
%token THEN
%token ELSE
%token WHILE
%token DO
%token FOR
%token TO
%token DOWNTO
%token CASE
%token RETURN
%token ANDTHEN
%token ORELSE
%token IDENTIFIER
%token PLUS
%token MINUS
%token MULTIPLY
%token DIVISION
%token GREATERTHAN
%token GREATEREQUAL
%token NOTEQUAL
%token EQUAL
%token LESSTHAN
%token LESSEQUAL
%token COMMA
%token SEMICOLON
%token COLON
%token DECLARE
%token OPPAR
%token CPAR
%token FALSE
%token TRUE
%token ERROR
%type <eval> exp

%left COMMA
%left INT BOOL REAL

%left COLON
%left IF_PREC   
%left ELSE
%left ANDTHEN ORELSE
%left PLUS MINUS
%left MULTIPLY DIVISION
%left GREATERTHAN GREATEREQUAL NOTEQUAL EQUAL LESSTHAN LESSEQUAL

%%

program:
    PROGRAM IDENTIFIER M SEMICOLON declist block SEMICOLON {
        printf("\nin program\n"); allSymbolTablePrint( pop(tblptrStack) ); 
    }
    | PROGRAM IDENTIFIER M SEMICOLON block SEMICOLON {
        printf("\nin program\n"); allSymbolTablePrint( pop(tblptrStack) ); 
    }
    ;
M:
    { 
        struct SymbolTable* t = mkTable( NULL , "program");
        push(t, tblptrStack);
    }
;
declist:
    dec
    | declist dec 
    ;
dec : 
    vardec {printf("var deccc");}
    | procdec
    | funcdec
    ;
type : 
    INT {
        currentType=4;
        returnType = (char*) malloc(strlen("INT"));
        strcpy(returnType, "INT");
        printf("this is int");
    }
    | REAL {
        currentType=8;
        returnType = (char*) malloc(strlen("REAL"));
        strcpy(returnType, "REAL");
    }
    |BOOL {
        currentType=1;
        returnType = (char*) malloc(strlen("BOOL"));
        strcpy(returnType, "BOOL");
    }
    ;
iddec : 
    IDENTIFIER { enterVar( tempString, currentType , 0 , top(tblptrStack) ); printf("a variable entered:%s\n",tempString); }
    | IDENTIFIER { enterVar( tempString, currentType , 0 , top(tblptrStack) ); printf("a variable entered:%s\n",tempString); } DECLARE {
    printf("this is declare eeee");
    } exp 
    ;
idlist : 
    iddec {printf("this is idlist iddec");}
    | idlist COMMA iddec
    ;
vardec :
    type idlist SEMICOLON 
    {printf("vardec");}
    ;
procdec :
    PROCEDURE IDENTIFIER NP OPPAR paramdecs CPAR declist block SEMICOLON {
        char* tmpc = top(tblptrStack)->name;
        struct SymbolTable* tmpt = pop(tblptrStack);
        enterProcFunc( tmpc , 0 , "NULL" ,  tmpt , top(tblptrStack) ); 
    }
    | PROCEDURE IDENTIFIER NP OPPAR paramdecs CPAR block SEMICOLON { 
        char* tmpc = top(tblptrStack)->name;
        struct SymbolTable* tmpt = pop(tblptrStack);
        enterProcFunc( tmpc , 0 , "NULL" ,  tmpt , top(tblptrStack) ); 
    }
    ;
NP:
    {
        printf("inside NP\t tempString:%s\n",tempString);
        struct SymbolTable* t = mkTable( top(tblptrStack) , tempString );
        push(t, tblptrStack);
    }
    ;
funcdec :
    FUNCTION IDENTIFIER FN OPPAR paramdecs CPAR COLON type {
        char* tmpc = top(tblptrStack)->name;
        struct SymbolTable* tmpt = pop(tblptrStack);
        enterProcFunc(tmpc , -1 , returnType ,  tmpt , top(tblptrStack) );
    } declist block SEMICOLON
    | FUNCTION IDENTIFIER FN OPPAR paramdecs CPAR COLON type {
        char* tmpc = top(tblptrStack)->name;
        struct SymbolTable* tmpt = pop(tblptrStack);
        enterProcFunc(tmpc , -1 , returnType ,  tmpt , top(tblptrStack) );
    } block SEMICOLON
    ;
FN:
    {
        printf("inside FN\t tempString:%s\n",tempString);
        struct SymbolTable* t = mkTable( top(tblptrStack) , tempString );
        push(t, tblptrStack);
    }
    ;
paramdecs :
    paramdec { printf("paramdecs"); }
    | paramdecs SEMICOLON paramdec { printf("paramdecs\n"); }
    | { printf("paramdecs\n"); }
    ;
paramdec : 
    type paramlist
    ;
paramlist :
    IDENTIFIER 
    | paramlist COMMA IDENTIFIER
    ;
block : 
    BEGINN stmtlist END
    | stmt 
    ;
stmtlist : 
    stmt 
    | stmtlist SEMICOLON stmt 
    ;
lvalue : 
    IDENTIFIER
    ;
stmt :
    lvalue DECLARE exp 
    | IF exp THEN block %prec IF_PREC
    | IF exp THEN block ELSE block %prec ELSE
    | WHILE exp DO block 
    | FOR lvalue DECLARE exp TO exp DO block 
    | FOR lvalue DECLARE exp DOWNTO exp DO block 
    | CASE exp caseelement END 
    | RETURN exp 
    | exp {printf(stmt exp);}
    ;
exp : 
    exp ANDTHEN exp 
    | exp ORELSE exp  
    | exp PLUS exp   {
        printf("Rule  \t\t mathlogicExpression -> mathlogicExpression KW_PLUS mathlogicExpression\n");
        $$.place = new_temp(.type);
        $$.type = .type;
        printf(.place, .place, "+", $$.place);
    };
    | exp MINUS exp 
    | exp MULTIPLY exp { printf(" RULE FOR MULTIPLY\n");}
    | exp DIVISION exp 
    | OPPAR exp CPAR 
    | boolexp relop boolexp
    | INTEGER {printf("this is integerrrr");}
    | REALNUM 
    | TRUE
    | FALSE 
    | lvalue 
    | IDENTIFIER OPPAR explist CPAR 
    ;
boolexp:
    OPPAR exp CPAR 
    | INTEGER 
    | REALNUM 
    | TRUE
    | FALSE 
    | lvalue 
    | IDENTIFIER OPPAR explist CPAR 
    ;
caseelement : 
    INTEGER COLON block SEMICOLON
    | caseelement INTEGER COLON block SEMICOLON
    ;
explist :
    exp 
    | explist COMMA exp  
    |
    ;
relop :
    GREATERTHAN
    | GREATEREQUAL
    | NOTEQUAL
    | EQUAL
    | LESSTHAN
    | LESSEQUAL
    ;


%%

struct SymbolTable* popTop(struct Stack* b ){
    pop(b);
    top(b);
}

int main(){
    //printf("hi1");
    tblptrStack=createStack();
    tblptrStack->top = NULL;
    //printf("hi2");
    yyin = fopen("input3.txt", "r");
    //printf("hi3");
    yyparse();
    return 0;
}

void yyerror(char* error){
    printf("Error : %s\n", error);
}

我试过这个输入例如：

Program p1Main;

    Int i1, i2:=#-23;   
Begin
    i1 := i1 + i2;

End
;

我收到这个错误

a variable entered:i1
a variable entered:i2
Error : syntax error
logout
Saving session...
...copying shared history...
...saving history...truncating history files...
...completed.

我试图输入 printf 来找出这个语法错误的来源，但我没有得到任何结果。我看过其他类似的问题并尝试了他们的解决方案，但没有奏效。如果你知道如何解决这个问题，请帮助我。

Answer 1

您收到语法错误，因为您的输入程序在语法上不正确，而解析器工作正常。

（修整过的）语法将程序显示为：

program:    PROGRAM IDENTIFIER M SEMICOLON declist block SEMICOLON

它显示一个块为：

block : 
    BEGINN stmtlist END
    | stmt 
    ;

随后的语句列表是：

stmtlist : 
    stmt 
    | stmtlist SEMICOLON stmt 
    ;

在这里，您会看到分号是语句分隔符而不是终止符。这意味着在 END 之前永远不能有分号。不管你是怎么做的：

 Begin
    i1 := i1 + i2;

End

所以你必须决定哪种语言是正确的解释；你的语法或你的输入程序。初学者经常犯这个错误，有经验的程序员或老师需要几秒钟才能发现。你怎么能用野牛来学习这个？我将在下面添加该详细信息...

你怎么能自己找到这个？那么，跟随 rules of Stack Overflow 将是一个好的开始。您没有将代码示例减少到解释问题所需的最低限度。你只是把所有的东西都扔进了你的问题中，然后就放弃了，把它交给了我们。

一个好学生怎么能把这个问题简单化。您可以做的一件事是简化对解析器的测试输入并尝试越来越简单的输入程序。例如，问问自己 "does the error occur in the declarations or the block?"。 "What happens if I use a simpler program with no declarations?"。使用这种形式的推导，您可能能够推断出故障位于 BLOCK 中的某处。您可以尝试与 BLOCK 和语句列表中的不同规则匹配的示例，以查看哪些备选方案有效，哪些无效，并最终找到导致问题的规则。

然后您可以创建一个更小的语法、词法分析器和测试程序以粘贴到 Stack Overflow 中，它解释了失败的特定语法规则的特定问题，并在此时寻求帮助。然而，通常，在创建较小示例的过程中，问题和解决方案会自行暴露。

找出导致问题的原因的一种更强大的程序化方法是使用 bison 中内置的调试功能。 Bison 有一个测试模式，通过运行ning bison 和 -t 参数启用，并在运行时间设置 yydebug 变量。这将生成详细的解析跟踪，准确显示导致错误的输入符号和规则。我将在下一期中向您展示生成的内容...

好的。在您粘贴的代码出现一些问题后，我现在已经生成了所需的输出。您错误地将无效代码粘贴到问题中。 parser.y 的第 41 行有一个额外的“}”，我不得不将其删除，我们没有您的 stack.h。我不得不删除你所有的语义动作来显示语法调试。无论如何，在这样做并启用运行 bison 调试之后，我们得到以下内容（在我的 Mac 上）：

briantompsett:~ cssbct$ gcc parser.tab.c lex.yy.c -ll -o parser
briantompsett:~ cssbct$ ./parser
Starting parse
Entering state 0
Reading a token: Program p1Main;

    Int i1, i2:=#-23;   
Begin
    i1 := i1 + i2;

End
;Next token is token PROGRAM ()
Shifting token PROGRAM ()
Entering state 1
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 3
Reducing stack by rule 3 (line 108):
-> $$ = nterm M ()
Stack now 0 1 3
Entering state 5
Reading a token: Next token is token SEMICOLON ()
Shifting token SEMICOLON ()
Entering state 6
Reading a token: Next token is token INT ()
Shifting token INT ()
Entering state 9
Reducing stack by rule 9 (line 122):
    = token INT ()
-> $$ = nterm type ()
Stack now 0 1 3 5 6
Entering state 26
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 50
Reading a token: Next token is token COMMA ()
Reducing stack by rule 12 (line 133):
    = token IDENTIFIER ()
-> $$ = nterm iddec ()
Stack now 0 1 3 5 6 26
Entering state 51
Reducing stack by rule 15 (line 139):
    = nterm iddec ()
-> $$ = nterm idlist ()
Stack now 0 1 3 5 6 26
Entering state 52
Next token is token COMMA ()
Shifting token COMMA ()
Entering state 82
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 50
Reading a token: Next token is token DECLARE ()
Shifting token DECLARE ()
Entering state 81
Reducing stack by rule 13 (line 134):
-> $$ = nterm @1 ()
Stack now 0 1 3 5 6 26 52 82 50 81
Entering state 110
Reading a token: Next token is token INTEGER ()
Shifting token INTEGER ()
Entering state 7
Reading a token: Next token is token SEMICOLON ()
Reducing stack by rule 52 (line 206):
    = token INTEGER ()
-> $$ = nterm exp ()
Stack now 0 1 3 5 6 26 52 82 50 81 110
Entering state 124
Next token is token SEMICOLON ()
Reducing stack by rule 14 (line 134):
    = token IDENTIFIER ()
    = token DECLARE ()
    = nterm @1 ()
    = nterm exp ()
-> $$ = nterm iddec ()
Stack now 0 1 3 5 6 26 52 82
Entering state 111
Reducing stack by rule 16 (line 140):
    = nterm idlist ()
    = token COMMA ()
    = nterm iddec ()
-> $$ = nterm idlist ()
Stack now 0 1 3 5 6 26
Entering state 52
Next token is token SEMICOLON ()
Shifting token SEMICOLON ()
Entering state 83
Reducing stack by rule 17 (line 143):
    = nterm type ()
    = nterm idlist ()
    = token SEMICOLON ()
-> $$ = nterm vardec ()
Stack now 0 1 3 5 6
Entering state 27
Reducing stack by rule 6 (line 117):
    = nterm vardec ()
-> $$ = nterm dec ()
Stack now 0 1 3 5 6
Entering state 25
Reducing stack by rule 4 (line 113):
    = nterm dec ()
-> $$ = nterm declist ()
Stack now 0 1 3 5 6
Entering state 24
Reading a token: this is idlist iddecthis is declare eeeethis is integerrrrvardecvar decccNext token is token BEGINN ()
Shifting token BEGINN ()
Entering state 14
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 20
Reading a token: Next token is token DECLARE ()
Reducing stack by rule 34 (line 184):
    = token IDENTIFIER ()
-> $$ = nterm lvalue ()
Stack now 0 1 3 5 6 24 14
Entering state 31
Next token is token DECLARE ()
Shifting token DECLARE ()
Entering state 54
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 20
Reading a token: Next token is token PLUS ()
Reducing stack by rule 34 (line 184):
    = token IDENTIFIER ()
-> $$ = nterm lvalue ()
Stack now 0 1 3 5 6 24 14 31 54
Entering state 39
Next token is token PLUS ()
Reducing stack by rule 56 (line 210):
    = nterm lvalue ()
-> $$ = nterm exp ()
Stack now 0 1 3 5 6 24 14 31 54
Entering state 84
Next token is token PLUS ()
Shifting token PLUS ()
Entering state 57
Reading a token: Next token is token IDENTIFIER ()
Shifting token IDENTIFIER ()
Entering state 20
Reading a token: Next token is token SEMICOLON ()
Reducing stack by rule 34 (line 184):
    = token IDENTIFIER ()
-> $$ = nterm lvalue ()
Stack now 0 1 3 5 6 24 14 31 54 84 57
Entering state 39
Next token is token SEMICOLON ()
Reducing stack by rule 56 (line 210):
    = nterm lvalue ()
-> $$ = nterm exp ()
Stack now 0 1 3 5 6 24 14 31 54 84 57
Entering state 87
Next token is token SEMICOLON ()
Reducing stack by rule 46 (line 200):
    = nterm exp ()
    = token PLUS ()
    = nterm exp ()
-> $$ = nterm exp ()
Stack now 0 1 3 5 6 24 14 31 54
Entering state 84
Next token is token SEMICOLON ()
Reducing stack by rule 35 (line 187):
    = nterm lvalue ()
    = token DECLARE ()
    = nterm exp ()
-> $$ = nterm stmt ()
Stack now 0 1 3 5 6 24 14
Entering state 38
Reducing stack by rule 32 (line 180):
    = nterm stmt ()
-> $$ = nterm stmtlist ()
Stack now 0 1 3 5 6 24 14
Entering state 37
Next token is token SEMICOLON ()
Shifting token SEMICOLON ()
Entering state 71
Reading a token: Next token is token END ()
Error : syntax error
Error: popping token SEMICOLON ()
Stack now 0 1 3 5 6 24 14 37
Error: popping nterm stmtlist ()
Stack now 0 1 3 5 6 24 14
Error: popping token BEGINN ()
Stack now 0 1 3 5 6 24
Error: popping nterm declist ()
Stack now 0 1 3 5 6
Error: popping token SEMICOLON ()
Stack now 0 1 3 5
Error: popping nterm M ()
Stack now 0 1 3
Error: popping token IDENTIFIER ()
Stack now 0 1
Error: popping token PROGRAM ()
Stack now 0
Cleanup: discarding lookahead token END ()
Stack now 0

您会看到它现在停在 END 之前的分号处，如跟踪的这一部分所示：

Next token is token SEMICOLON ()
Shifting token SEMICOLON ()
Entering state 71
Reading a token: Next token is token END ()
Error : syntax error
Error: popping token SEMICOLON ()

这就是你以后调试的方式。

使用此语法在 flex/bison 中出现语法错误

Getting Syntax Error in flex/bison with this grammar

c

compiler-construction

grammar

bison

flex-lexer