为什么我生成的 Flex/Bison 解析器会给出语法错误,即使它被添加到规则中?

Why my generated Flex/Bison parser gives syntax error even if it's added in rules?

我正在尝试实现一个简单的 flex/bison 代码,它将像程序一样检查 C:

main(){
x = 3;
print x;
}

但是当我向程序提供这个输入时,它无法与我在 flex 文件中提到的规则相匹配。下面是我对 flex 和 bison 文件的代码:

calc.l

%{
#include <stdio.h>
#include <string.h>
#include "calc.tab.h"
int lineno = 1;
%}

digit   [0-9]+
id      [a-z][a-zA-Z0-9]*

%%
{digit}+    { yylval.num = atoi(yytext); return TOK_NUMBER; }
"main"      { return TOK_MAIN; }
"("         { return TOK_ORBRACKET; }
")"         { return TOK_CRBRACKET; }
"{"         { return TOK_OCBRACKET; }
"}"         { return TOK_CCBRACKET; }
"print"     { return TOK_PRINT; }
{id}        { sscanf(yytext, "%s", (yylval.index)); return TOK_VARIABLE; }
";"         { return TOK_SEMICOLON; }
"+"         { return TOK_ADD; }
"*"         { return TOK_MUL; }
"(-{digit}+)" { return TOK_NEGNUM; } 
"="         { return TOK_EQUAL; }
[ \t]+      { }
[ \n]+      { lineno++; }
.           { printf("Lexical error:'%c'\n", yytext[0]); }

%%

calc.y

%{
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include "calc.tab.h"

extern int lineno;
int variable_counter = 0;

/* Flex functions */
int yylex(void);
void yyerror(char *s);
void set_variable(char* var_name, int var_value);
int get_variable_value(char* var_name);
extern FILE* yyin;
%}

%union {
    char index[100];
    int num;
}

%token TOK_NUMBER TOK_MUL TOK_ADD TOK_NEGNUM TOK_EQUAL TOK_MAIN TOK_ORBRACKET TOK_CRBRACKET TOK_OCBRACKET TOK_CCBRACKET TOK_SEMICOLON TOK_PRINT TOK_VARIABLE

%code requires {
    struct symtable
    {
        char var_name[100];
        int var_value;
    };
}

%code {
    struct symtable symboltable[100];
    int pos = 0;
}

%type <num> expr TOK_NUMBER TOK_NEGNUM
%type <index> TOK_VARIABLE

%left TOK_ADD
%left TOK_MUL

%%

prog:
    TOK_MAIN TOK_ORBRACKET TOK_CRBRACKET TOK_OCBRACKET stmts TOK_CCBRACKET
;

stmts:
    | stmt TOK_SEMICOLON stmts
;

stmt:
    expr TOK_SEMICOLON
    | TOK_PRINT expr TOK_SEMICOLON      {   fprintf(stdout, "%d\n", );    }
    | assignment
;

expr:
    TOK_NUMBER                                  { $$ = ; }
    | TOK_VARIABLE                              { $$ = get_variable_value(); }
    | expr TOK_MUL expr                         { $$ =  * ; }
    | expr TOK_ADD expr                         { $$ =  + ; }
    | TOK_NEGNUM                                { $$ = -; }
    | TOK_ORBRACKET expr TOK_CRBRACKET          { $$ = ; }
;

assignment:
    TOK_VARIABLE TOK_EQUAL expr { set_variable(, ); }
;

%%

void set_variable(char* var_name, int var_value) {
    int counter;
    bool found = false;
    for (counter = 0; counter<=variable_counter; counter++) {
        if (strcmp(var_name, symboltable[counter].var_name) == 0) {
            found = true;
            break;
        }
    }

    if(!found) {
        strcpy(symboltable[counter].var_name, var_name);
        symboltable[counter].var_value = var_value;
        variable_counter++;
    }
}

int get_variable_value(char* var_name) {
    int counter;
    for (counter = 0; counter<=variable_counter; counter++) {
        if (strcmp(var_name, symboltable[counter].var_name) == 0) {
            return symboltable[counter].var_value;
        }
    }
}

void yyerror(char *s)
{
    fprintf(stderr, "Parsing error: line %d and %s\n", lineno, s);
}

int main(int argc,char* argv[])
{
    if(argc==1) {
        printf("\nPlease provide an input file name. Exiting...\n");
        return 0;
    }

    yyin = fopen(argv[1], "r");
    if (!yyin) {
        printf("ERROR: Couldn't open file %s\n", argv[1]);
        return -1;
    } 

    yyparse();
    return 0;
}

它显示输出为:

'exical error:'
'exical error:'
3
'exical error:'
Parsing error: line 4 and syntax error

在此先感谢您的帮助。

两个问题:

消息 'exical error:' 来自已解析文件中的 DOS 样式 \r\n 行结尾。您应该将这些转换为 \n 结尾或在解析器中吃掉 \r 字符,例如通过扩展规则

[ \t]+      { }

[ \t\r]+      { }

消息 Parsing error: line 4 and syntax error 来自您的解析器中的逻辑缺陷。这里

stmts:
    | stmt TOK_SEMICOLON stmts
;

您指定,每个 stmt 都以分号结尾。但是这里

stmt:
    expr TOK_SEMICOLON
    | TOK_PRINT expr TOK_SEMICOLON      {   fprintf(stdout, "%d\n", );    }
    | assignment
;

需要在exprTOK_PRINT expr后加一个分号。所以你需要在这一行后加两个分号

print x;;

但您可能想像这样删除多余的 TOK_SEMICOLON

stmt:
    expr
    | TOK_PRINT expr      {   fprintf(stdout, "%d\n", );    }
    | assignment
;

那么您的文件应该会按照您的预期进行解析。