使用 Bison 解析简单 C 源代码的问题

Problems with parsing simple C source code using Bison

这是我正在使用的解析器代码

%{
#include <cstdio>
#include <iostream>
#include <cstring>
#include <stdio.h>
#include "c.ast.hpp"
#include <typeinfo>
#define YYDEBUG 1

using namespace std;

// stuff from flex that bison needs to know about:
extern "C" int yylex();
int yyparse(BlockOfFunctions *ast);
extern "C" FILE *yyin;
 
void yyerror(BlockOfFunctions *ast, const char *s);

#define TRACE printf("reduce at line %d\n", __LINE__);


%}
%token  IDENTIFIER I_CONSTANT F_CONSTANT STRING_LITERAL FUNC_NAME SIZEOF
%token  PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
%token  AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
%token  SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
%token  XOR_ASSIGN OR_ASSIGN
%token  TYPEDEF_NAME ENUMERATION_CONSTANT

%token  TYPEDEF EXTERN STATIC AUTO REGISTER INLINE
%token  CONST RESTRICT VOLATILE
%token  BOOL CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE VOID
%token  COMPLEX IMAGINARY 
%token  STRUCT UNION ENUM ELLIPSIS

%token  CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN

%token  ALIGNAS ALIGNOF ATOMIC GENERIC NORETURN STATIC_ASSERT THREAD_LOCAL

%start translation_unit
%parse-param {BlockOfFunctions *ast}

%union {
    string *str;
    TypeSpecifier typespec;
    FunctionDefinition *func;
    BlockOfFunctions *blockfunc;
    Declaration *decl;
    vector<Declaration> *decls;
    Signature *sig;
}

%type<typespec> type_specifier declaration_specifiers
%type<str> IDENTIFIER
%type<func> external_declaration function_definition
%type<blockfunc> translation_unit
%type<decl> parameter_declaration
%type<decls> parameter_list parameter_type_list
%type<sig> declarator direct_declarator
%%

declaration_specifiers
    : type_specifier { TRACE $$ = ; }
    ;

type_specifier
    : VOID {
        cout << "creating void" << endl;
        $$ = TypeSpecifier::Void; }
    | INT { cout << "creating int" << endl; $$ = TypeSpecifier::Int; }
    ;

declarator
    : direct_declarator { $$ = ; }
    ;

direct_declarator
    : IDENTIFIER {
        Signature sig;
        string name = *;
        sig.name = name;
        $$ = &sig;
        cout << "creating identifier " << sig.name << endl;
    }
    | direct_declarator '(' parameter_type_list ')' {
        cout << "with argument" << endl;
        
        cout << "got declarator " << * << endl;
        cout << "creating declaration " << ->at(0) << endl;
        $$ = ;
    }
    | direct_declarator '(' ')' {
        $$ = ;
        cout << "argument less function" << endl; 
    }
    ;

parameter_type_list
    : parameter_list {
        $$ = ;
        cout << "creating parameter type list " << $$->at(0) << endl; 
    }
    ;

parameter_list
    : parameter_declaration {
        vector<Declaration> params;
        cout << "pushing back " << * << endl;
        params.push_back(*);
        $$ = &params;
        cout << "creating parameter declaration " << $$->at(0) << endl;
    }
    ;

parameter_declaration
    : declaration_specifiers declarator {
        cout << "creating param declaration" << endl;
        Declaration decl;
        string name = ->name;
        decl.type = ;
        decl.name = name;
        $$ = &decl;
    }
    ;

translation_unit
    : external_declaration { ast->block.push_back(*); }
    | translation_unit external_declaration { ast->block.push_back(*); }
    ;

external_declaration
    : function_definition  { TRACE $$ = ; }
    ;

function_definition
    : declaration_specifiers declarator '{' '}' {
        string name = ->name;
        FunctionDefinition fn;
        fn.ret = ;
        fn.name = name;
        $$ = &fn;
    }
    ;

%%
#include <stdio.h>

void yyerror(BlockOfFunctions *ast, const char *s)
{
    fflush(stdout);
    fprintf(stderr, "*** %s\n", s);
}

然后我尝试用这个

解析下面的源代码
void empty(int a) { }

但我得到以下输出

bison -t -v -o c.tab.cpp -d c.y
flex -o c.lex.cpp -l c.l
g++ c.tab.cpp c.lex.cpp cc.cpp -lm -ll -lfl -o cc
./cc examples/test.c
creating void
reduce at line 63
creating identifier empty
creating int
reduce at line 63
creating identifier a
creating param declaration
pushing back declaration: int a

creating parameter declaration declaration: int a

creating parameter type list declaration: void 

with argument
got declarator signature: a

creating declaration declaration: void 

reduce at line 129
retv = 0
function: void a

它错误地将函数名称解析为 a,而实际上应该是 empty。我已将错误缩小到特定位置:parameter_list 非终端被正确解析,但当它移动到 parameter_type_list 时,它变成了一个完全不同的对象。从运行时打印的信息可以看出。

显然我做错了什么,但我想不通。任何帮助将不胜感激。

此声明(以及其他类似声明)是明确的未定义行为:

$$ = &decl;

您正试图存储一个指向生命周期即将结束的局部变量的指针。当悬空指针的值最终被使用时,它不再指向任何东西。

我强烈建议您将 -Wall 添加到您的 g++ 标志中。我不知道 gcc 是否会检测到这个错误,尤其是在没有优化标志的情况下,但是不给它一个警告你的机会是没有意义的。

在没有看到您的 flex 代码的情况下,我无法判断您是否也将悬挂指针作为标记的语义值传递,这是语义值神秘变异的另一个常见原因。您可能也想检查一下。