如何调试我的 flex/bison 语法?

How can I debug my flex/bison grammar?

这是一个非常愚蠢的问题。语法规则 afaik 没有错误,但它没有给出正确的输出。我一直盯着它看,但我看不到错误。

我可以使用哪些工具来帮助我了解解析中的情况?我尝试插入跟踪代码需要做很多工作,但似乎对我帮助不大。

parser.y

%{
#include<stdio.h>
#include<stdlib.h>  
#include<string.h>
#include "SymbolTable.h"
#include "SymbolInfo.h"
#include "ScopeTable.h"

int yyparse(void);
int yylex(void);
extern char* yytext;
extern FILE * yyin;
extern int tableSize;

FILE *logout;
extern int line_count;
extern char *arr[100];
extern char *final_arr[100];

SymbolTable *table;

void yyerror (const char *s)
{
    fprintf(stderr,"%s\n",s);
    return;
}

%}

%union {
    class SymbolInfo* sym;
    char *s;
    float f;
}

%error-verbose
%verbose
%token COMMA INT ID SEMICOLON FLOAT VOID LCURL RCURL RETURN NOT IF FOR WHILE PRINTLN LPAREN RPAREN
%token CONST_INT CONST_FLOAT LTHIRD RTHIRD 
%token ADDOP MULOP INCOP DECOP RELOP LOGICOP ASSIGNOP

%token <f> DOUBLE
//%expect 1

%precedence THEN
%precedence ELSE

%left "<" ">" "<=" ">=" "=" "!="
%left "+" "-"
%left "*" "/"
%left UMINUS 


%%

start : program     {   printf("start -> program\n");
                        fprintf(logout,"%d : start ->  program\n",line_count);
                    }
      ;

program : program unit {
                            printf("program -> program unit\n");
                            fprintf(logout,"%d : program -> program unit\n\n",line_count);
                            for(int j = 0; final_arr[j] != NULL; j++)
                            {
                                fprintf(logout,"%s",final_arr[j]);
                            }
                                fprintf(logout,"\n\n");
                        }
        | unit          {
                            printf("program -> unit\n");
                            fprintf(logout,"%d : program -> unit\n\n",line_count);
                            for(int j = 0; final_arr[j] != NULL; j++)
                            {
                                fprintf(logout,"%s",final_arr[j]);
                            }
                                fprintf(logout,"\n\n");

                        }
        ;

unit : var_dec  {
                    printf("unit -> var_dec\n");
                    fprintf(logout,"%d : unit -> var_dec\n\n",line_count);
                    for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");

                }
                |func_declaration {

                fprintf(logout,"%d : unit -> func_declaration\n\n",line_count);
                    for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");
                }
                |func_definition {

                fprintf(logout,"%d : unit -> func_definition\n\n",line_count);
                    for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");

                }
                ;

     ;

func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON     {

                printf("func_declaration -> type_specifier id LPAREN parameter_list RPAREN SEMICOLON\n");
                fprintf(logout,"%d : func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON\n\n", line_count);
                for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");

        }
        | type_specifier ID LPAREN RPAREN SEMICOLON {
                printf("func_declaration -> type_specifier id LPAREN RPAREN SEMICOLON\n");
                fprintf(logout,"%d : func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON\n\n", line_count); 

                for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");


        }
        ;

func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement {
                printf("func_definition -> type_specifier ID LPAREN parameter_list RPAREN compound_statement\n");
                fprintf(logout,"%d : func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement\n\n", line_count); 

        }
        | type_specifier ID LPAREN RPAREN compound_statement {
                printf("func_definition -> type_specifier id LPAREN RPAREN compound_statement\n");
                fprintf(logout,"%d : func_definition : type_specifier ID LPAREN RPAREN compound_statement\n\n", line_count);    

        }
        ;               


parameter_list  : parameter_list COMMA type_specifier ID {

                printf("parameter_list -> parameter_list COMMA type_specifier ID\n");
                fprintf(logout,"%d : parameter_list  : parameter_list COMMA type_specifier ID\n\n", line_count);    
                for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");

        }
        | parameter_list COMMA type_specifier {
                printf("parameter_list -> parameter_list COMMA type_specifier\n");
                fprintf(logout,"%d : parameter_list  : parameter_list COMMA type_specifier\n\n", line_count);   

        }
        | type_specifier ID {
                printf("parameter_list -> type_specifier ID\n");
                fprintf(logout,"%d : parameter_list : type_specifier ID\n\n", line_count);  
                for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");
        }
        | type_specifier {
                printf("parameter_list -> type_specifier\n");
                fprintf(logout,"%d :  parameter_list : type_specifier \n\n", line_count);   

        }
        ;


compound_statement : LCURL statements RCURL {
    printf("compound_statement -> LCURL statements RCURL\n");
    fprintf(logout,"compound_statement : LCURL statements RCURL\n\n");
}
            | LCURL RCURL
            ;

var_dec: type_specifier declaration_list SEMICOLON {

                    printf("var_dec -> type_specifier declaration_list SEMICOLON \n");
                    fprintf(logout,"%d : var_dec: type_specifier declaration_list SEMICOLON \n\n", line_count);

                    for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");

            }
        ;            

type_specifier : INT    {printf("type_specifier -> INT\n");
                            fprintf(logout,"%d : type_specifier-> INT\n\n%s\n\n", line_count,yytext);
                        }
               | FLOAT  {printf("type_specifier ->FLOAT\n");
                            fprintf(logout,"%d : type_specifier-> FLOAT\n\n%s\n\n",line_count, yytext);

                        }
               | VOID   {printf("type_specifier -> VOID\n");
                            fprintf(logout,"%d : type_specifier-> VOID\n\n%s\n\n",line_count, yytext);

                         }
               ;        

declaration_list : declaration_list COMMA ID {

                        printf("declaration_list -> declaration_list COMMA ID\n");  
                        fprintf(logout,"%d : declaration_list -> declaration_list COMMA ID\n\n",line_count);
                        for(int j = 1; arr[j+1] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                            fprintf(logout,"\n\n");
                       }
                 | declaration_list COMMA ID LTHIRD CONST_INT RTHIRD {

                        printf("declaration_list -> declaration_list COMMA ID LTHIRD CONST_INT RTHIRD\n");      
                        fprintf(logout,"%d : declaration_list -> declaration_list COMMA ID LTHIRD CONST_INT RTHIRD\n",line_count);
                        for(int j = 1; arr[j+1] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                            fprintf(logout,"\n\n");

                        }
                 |ID    {
                        printf("declaration_list -> ID\n");
                        fprintf(logout,"%d : declaration_list -> ID\n\n",line_count);
                        for(int j = 1; arr[j+1] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                            fprintf(logout,"\n\n");
                        }
                 |ID LTHIRD CONST_INT RTHIRD {

                        printf("declaration_list -> ID LTHIRD CONST_INT RTHIRD\n"); 
                        fprintf(logout,"%d : declaration_list -> ID LTHIRD CONST_INT RTHIRD\n",line_count);
                        for(int j = 1; arr[j+1] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                            fprintf(logout,"\n\n");

                        }
                 ;  

statements : statement {
    printf("statements -> statement\n");
    fprintf(logout,"%d : statements : statement\n\n",line_count);
    fprintf(logout, "%s\n\n",yytext);
}
       | statements statement
       ;

statement : var_dec
      | expression_statement
      | compound_statement
      | FOR LPAREN expression_statement expression_statement expression RPAREN statement
      | IF LPAREN expression RPAREN statement
      | WHILE LPAREN expression RPAREN statement
      | PRINTLN LPAREN ID RPAREN SEMICOLON
      | RETURN expression SEMICOLON  {
            printf("statement -> RETURN expression SEMICOLON\n");
            fprintf(logout,"%d : statement : RETURN expression SEMICOLON\n\n",line_count);
            fprintf(logout, "%s\n\n",yytext);
      }
      ;

expression_statement    : SEMICOLON         
            | expression SEMICOLON 
            ;

variable : ID   {
                    printf("variable -> ID\n");
                    fprintf(logout,"%d : variable : ID\n\n",line_count);
                    fprintf(logout, "%s\n\n",yytext);
}   
     | ID LTHIRD expression RTHIRD 
     ;

 expression : logic_expression  {
        printf("expression -> logic_expression\n");
        fprintf(logout,"%d : expression : logic_expression\n\n",line_count);
        fprintf(logout, "%s\n\n",yytext);
 }
       | variable ASSIGNOP logic_expression     
       ;

logic_expression : rel_expression   
         | rel_expression LOGICOP rel_expression    
         ;

rel_expression  : simple_expression {
    printf("rel_expression  -> simple_expression \n");
    fprintf(logout,"%d : rel_expression : simple_expression\n\n",line_count);
    fprintf(logout, "%s\n\n",yytext);
}
        | simple_expression RELOP simple_expression 
        ;

simple_expression : term {
    printf("simple_expression -> term\n");
    fprintf(logout,"%d : simple_expression : term \n\n",line_count);
    fprintf(logout, "%s\n\n",yytext);
} 
          | simple_expression ADDOP term {
            printf("simple_expression -> simple_expression ADDOP term\n");
            fprintf(logout,"simple_expression : simple_expression ADDOP term \n\n");
            fprintf(logout, "%s\n\n",yytext);
          }
          ;

term :  unary_expression {
                printf("term -> unary_expression\n");
                fprintf(logout,"%d : term : unary_expression\n\n",line_count);
                fprintf(logout, "%s\n\n",yytext);
            }
     |  term MULOP unary_expression
     ;

unary_expression : ADDOP unary_expression  
         | NOT unary_expression 
         | factor {
            printf("unary_expression -> factor\n");
            fprintf(logout,"%d : unary_expression : factor\n\n",line_count);
            fprintf(logout, "%s\n\n",yytext);
         }
         ;

factor  : variable {
    printf("factor -> variable\n");
    fprintf(logout,"%d : factor : variable\n\n",line_count);
    fprintf(logout, "%s\n\n",yytext);
}
    | ID LPAREN argument_list RPAREN
    | LPAREN expression RPAREN
    | CONST_INT 
    | CONST_FLOAT
    | variable INCOP 
    | variable DECOP
    ;

argument_list : arguments
              |
              ;

arguments : arguments COMMA logic_expression
          | logic_expression
          ;




%%

int main(int argc, char *argv[])
{

    FILE *fp  ;
    int token = 0;
    if((fp = fopen(argv[1],"r")) == NULL)
    {
        fprintf(logout,"cannot open file");
        exit(1);
    }


    logout = fopen("log.txt","w");

    yyin = fp;
    yyparse();

    fclose(fp);
    fclose(logout);
    return 0;

}

input.txt

int var(int a, int b){
return a+b;

}

我得到的输出:

type_specifier -> INT
type_specifier -> INT
parameter_list -> type_specifier ID
type_specifier -> INT
parameter_list -> parameter_list COMMA type_specifier ID
variable -> ID
factor -> variable
unary_expression -> factor
term -> unary_expression
simple_expression -> term
rel_expression  -> simple_expression 
expression -> logic_expression
syntax error, unexpected ID, expecting SEMICOLON

预期输出是:

type_specifier -> INT
type_specifier -> INT
parameter_list -> type_specifier ID
type_specifier -> INT
parameter_list -> parameter_list COMMA type_specifier ID
variable -> ID
factor -> variable
unary_expression -> factor
term -> unary_expression
simple_expression -> term

variable -> ID
factor -> variable
unary_expression -> factor
term -> unary_expression
simple_expression : simple_expression ADDOP term
rel_expression  -> simple_expression 
logic_expression : rel_expression
expression -> logic_expression
statement : RETURN expression SEMICOLON
statements : statement
compound_statement : LCURL statements RCURL
func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement
unit : func_definition
program : program unit
start : program

添加 flex 文件以防万一

%option noyywrap

%{

#include<stdlib.h>
#include<stdio.h>
#include "y.tab.h"
#include "SymbolTable.h"
#include "SymbolInfo.h"
#include "ScopeTable.h"

void yyerror (char *);
extern YYSTYPE yylval;  
extern SymbolTable *table;
extern FILE *logout;
char *arr[100];
char *final_arr[100];

int k; //final_arr count
int i = 0; //arr count
int line_count = 1;

%}


id [a-z]*
DOUBLE (([0-9]+(\.[0-9]*)?)|([0-9]*\.[0-9]+)) 
newline \n

%%

{newline} {
        arr[i] = "\n",final_arr[k] = arr[i];
        i++; k++;
        line_count++;
    }

[ \t]+  {}
(([0-9]+(\.[0-9]*)?)|([0-9]*\.[0-9]+))  {
                        yylval.f = atof(yytext);
                        return DOUBLE;
                    }

"int" {
        memset(&arr,NULL,sizeof(arr)); i = 0;
        arr[i] = "int "; 
        final_arr[k] = "int ";
        i++; k++;
        return INT;
    }
"float" {
        memset(&arr,NULL,sizeof(arr)); i = 0;
        arr[i] = "float "; final_arr[k] = "float ";
        i++; k++;
        return FLOAT;
    }
"void"  {
        memset(&arr,NULL,sizeof(arr)); i = 0;
        arr[i] = "void "; final_arr[k] = "void ";
        i++; k++;
        return VOID;
    }   


";" {
        arr[i] = ";";final_arr[k] = ";";
        i++; k++;
        return SEMICOLON;}
"," {
        arr[i] = ","; final_arr[k] = ",";
        i++; k++;
        return COMMA;
    }
"(" {
        arr[i] = "(";final_arr[k] = "(";
        i++; k++;
        return LPAREN;}
")" {
        arr[i] = ")";final_arr[k] = ")";
        i++; k++;
        return RPAREN;}
"{" {return LCURL;}
"}" {return RCURL;}

{id}    {
        yylval.s = strdup(yytext);
        arr[i] = strdup(yytext); final_arr[k] = strdup(yytext);
        k++; i++;
        for(int j = 1; arr[j] != NULL; j++)
        {
            //fprintf(logout,"%s", arr[j]);
            //fprintf(logout,"arr [%d] %s\n ",j,arr[j]);
        }
        //fprintf(logout,"\n\n");
        return ID;

        }

%%                          

您似乎花了很多精力来尝试实现一种跟踪解析器中发生的事情的方法,但收效甚微,因为这里的问题只是缺少词法分析器关键字规则。

最好使用 flex 和 bison 的内置调试功能。然后你的语法和词法分析器会更简单和更容易阅读,调试输出会更完整(并且会让你通过状态跟踪行为table)。

这里有一个简短的总结。真的很简单。

  1. --debug 添加到您的 bison 命令中。这将导致野牛生成代码来跟踪您的解析。 (如果你懒惰,你可以使用 -t -- for trace -- 这是 Posix 标准命令行选项,并且应该也可以与 yacc、byacc、btyacc 等一起使用,等等.)

  2. main开头添加以下三行,假设main在你的.y文件中:

    #ifdef YYDEBUG
      yydebug = 1;
    #endif
    

    为了获得额外的奖励积分,您可以使此分配以某些命令行标志为条件。

    完成后,您将收到以下跟踪输出:

    ... snip ... Pick up the trace at the ) at the end of the parameter list
    Reading a token: Next token is token RPAREN ()
    Shifting token RPAREN ()
    Entering state 28
    Reading a token: Next token is token LCURL ()
    Shifting token LCURL ()
    Entering state 25
    Reading a token: Next token is token ID ()
    Shifting token ID ()
    Entering state 44
    Reading a token: Next token is token ID ()
    ... snip ...
    

    注意大括号后面返回了两个ID,对应token returna.

  3. 您还可以使用 flex --debug(或 -d)在 flex 中启用跟踪。这会导致扫描器生成格式为

    的输出行
    --accepting rule at line 85 ("return")
    

    对于每个接受的令牌(以及其他一些行)。不幸的是,您需要根据您的源代码检查行号,但在这种情况下,您可能已经注意到上面和

    之间的相似性
    --accepting rule at line 85 ("b")
    

    为了进一步简化调试,值得养成以一种可以独立于解析器编译的方式编写扫描器的习惯。然后你可以通过使用 -lfl.

  4. 中的 main() 实现单独编译来测试你的扫描器

参考资料和更多调试信息: