将令牌分配给结构节点类型变量后,令牌为空

after assigning token to struct node type variable the token is null

我正在解决这个奇怪的问题(至少对我来说) 我正在使用 lex 和 yacc 创建一个 AST 虽然所有的标记都有效并且语法也有效(我只用印刷品测试了它) 当我试图将一个新节点分配给一个变量时,只是为了测试我正在尝试打印节点的令牌,每次我尝试它时令牌只是(空) 例如,这是我的代码的一部分:

ifelse: IF LEFTBRACKET cond RIGHTBRACKET LEFTBLOCK body RIGHTBLOCK {$$ = mknode("IF",,);printf("token is %s\n",$$->token);} 

mknode 函数是这样工作的:

node* mknode(char* token, node* left, node* right){
    node* newnode = (node*)malloc(sizeof(node));
    char* newstr = (char*)malloc(sizeof(token)+1);
    strcpy(newstr, token);
    newnode->left = left;
    newnode->right = right;
    return newnode;
}

所以对于这个例子,这是输出:

token is (null)

知道为什么它保持为空吗? 如果重要的话,我正在使用 VMWare Ubuntu 20.04

完整的 lex 文件:

%{
#include "y.tab.h"
#include <stdio.h>
#include <string.h>
%}

%%
"\"" {printf("LEX: double quote here\n");return DQUOTE;}
"\'" {printf("LEX: single quote here\n");return SQUOTE;}
"}" {printf("LEX: } here\n");return RIGHTBLOCK;}
"{" {printf("LEX: { here\n");return LEFTBLOCK;}
";" {printf("LEX: ; here\n");return SEMICOLON;}
"," {printf("LEX: comma here\n");return COMMA;}
"(" {printf("LEX: opening bracket here\n");return LEFTBRACKET;}
")" {printf("LEX: closing bracket here\n");return RIGHTBRACKET;}


&& {printf("LEX: and here\n");return AND;}
"||" {printf("LEX: || here\n");return OR;}
"=" {printf("LEX: assign here\n");return ASSIGN;}
== {printf("LEX: == here\n");return EQ;}
">" {printf("LEX: > here\n");return GT;}
">=" {printf("LEX: >= here\n");return GTEQ;}
"<" {printf("LEX: < here\n");return LT;}
"<=" {printf("LEX: <= here\n");return LTEQ;}
"-" {printf("LEX: - here\n");return SUB;}
"!" {printf("LEX: ! here\n");return NOT;}
"!=" {printf("LEX: != here\n");return NOTEQ;}
"/" {printf("LEX: div here\n");return DIV;}
"+" {printf("LEX: add here\n");return ADD;}
"*" {printf("LEX: mul here\n");return MUL;}
"&" {printf("LEX: & here\n");return ADRS;}

if {printf("LEX: if here\n");return IF;}
else {printf("LEX: else here\n");return ELSE;}

do {printf("LEX: do here\n");return DO;}
while {printf("LEX: while here\n");return WHILE;}
for {printf("LEX: for here\n");return FOR;}

var {printf("LEX: var here\n");return VAR;}
return {printf("LEX: return here\n");return RETURN;}
null {printf("LEX: nullval here\n");return NULLVAL;}

void {printf("LEX: func return type here\n");return VOID;}
"int*" {printf("LEX: int* type here\n");return INTPOINT;}
"char*" {printf("LEX: char* type here\n");return CHARPOINT;}
"real*" {printf("LEX: real* type here\n");return REALPOINT;}
int {printf("LEX: int type here\n");return INT;}
real {printf("LEX: real type here\n");return REAL;}
char {printf("LEX: char type here\n");return CHAR;}
bool {printf("LEX: bool type here\n");return BOOL;}
"true"|"false" {printf("LEX: boolval here\n");return BOOLVAL;}
[0-9]+ {yylval.string = yytext;printf("LEX: int val here\n");return INTVAL;}
"-"|0|[1-9][0-9]+"."[0-9]+|[1-9][0-9]+'.'[0-9]['E'|'e']['+'|'-'][0-9]+ {printf("LEX: realval here\n");yylval.string = strdup(yytext);return REALVAL;}
[a-zA-Z][0-9]*"_"[a-zA-Z]* {printf("LEX: ID here\n");yylval.string = strdup(yytext);return ID;}
[a-zA-Z] {printf("LEX: char here\n");yylval.string = strdup(yytext);return CHARVAL;}
[a-zAZ]*[0-9]*[a-zAZ]+[0-9]*[a-zAZ]* {printf("LEX: string here\n");yylval.string = strdup(yytext);return STRING;}
. ;
%%

完整的yacc文件:

%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <string.h>

#include "lex.yy.c"
int yyerror();
typedef struct node
{
char* token;
struct node *left;
struct node *right;
}node;

node* mknode(char* token, node* left, node* right);
void printtree(node* tree);
%}

%union
{
    struct node *node;
    char* string;
}    

%token <string> DIV ADD MUL SUB AND NOT OR RETURN ASSIGN
%token <string> EQ GT GTEQ LT LTEQ NOTEQ SEMICOLON COMMA LEFTBRACKET RIGHTBRACKET RIGHTBLOCK LEFTBLOCK
%token <string> ID CHARVAL INTVAL REALVAL BOOLVAL STRING ADRS DQUOTE SQUOTE 
%token <node> INT REAL BOOL CHAR VOID INTPOINT CHARPOINT REALPOINT VAR NULLVAL 
%token <node> IF ELSE WHILE DO FOR 

%type <string> name oper type rettype ret
%type <node> code ifelse body action args argnum math cond params block valvar

%left SEMICOLON COMMA RIGHTBRACKET RIGHTBLOCK
%right LEFTBLOCK 
%%
st: code {printf("YACC: Code done!\n");}

code: rettype name params block code {
    $$ = mknode("(FUNC",mknode(,mknode("(ARGS",,NULL),mknode("(RET",mknode(,NULL,NULL),NULL)),mknode("(BODY",, NULL));
    printf("YACC: func ready\n");}| {}

params: LEFTBRACKET args RIGHTBRACKET {$$ = ;}
        

block: LEFTBLOCK code RIGHTBLOCK {$$ = ;}| 
       LEFTBLOCK body RIGHTBLOCK {$$ = ;}| 
       LEFTBLOCK code body RIGHTBLOCK {$$ = mknode("",,);}

ifelse: IF LEFTBRACKET cond RIGHTBRACKET LEFTBLOCK body RIGHTBLOCK {$$ = mknode("IF",,);printf("token is %s\n",$$->token);printf("YACC: if ready\n");}|
        IF LEFTBRACKET cond RIGHTBRACKET LEFTBLOCK body RIGHTBLOCK ELSE LEFTBLOCK body RIGHTBLOCK {$$ = mknode("IF",,mknode("",,mknode("ELSE",,NULL)));
            printf("YACC: if else ready\n");}

rettype:VOID {$$ = "VOID";}|
        type {$$ = ;}

name: STRING {$$ = ;}|
      CHARVAL {$$ = ;}

args: type argnum {$$ = mknode(,mknode(" ",NULL,NULL),);}| 
      {$$ = mknode("",NULL,NULL);printf("YACC: args ready\n");}

argnum: name argnum {$$ = mknode(,mknode(" ",NULL,NULL),);}| 
        COMMA argnum {$$ = mknode(" ",,NULL);}| 
        SEMICOLON args {$$ = mknode(" ",,NULL);}|  
        {$$ = NULL;printf("YACC: args num ready\n");}

type: INT {$$ = "INT";}|
      REAL {$$ = "REAL";}|
      CHAR {$$ = "CHAR";}|
      INTPOINT {$$ = "INT*";}|
      CHARPOINT {$$ = "CHAR*";}|
      REALPOINT {$$ = "REAL*";};

body: action body {$$ = mknode(" ",,);}|
       action {$$ = mknode(" ",,NULL);}| 
       ifelse  {$$ = mknode("(IF-ELSE",mknode("\n",NULL,NULL),);printf("YACC: block ready\n");}

action: name ASSIGN math {$$ = mknode(,mknode(,NULL,NULL),mknode(" ",,NULL));printf("YACC: action ready\n");}| 
        RETURN ret SEMICOLON {$$ = mknode("(RET",mknode(,NULL,NULL),NULL);printf("YACC: return action ready\n");}

ret: INTVAL {$$ = ;}| 
     SQUOTE CHARVAL SQUOTE {$$ = ;}| 
     REALVAL {$$ = ;}| 
     DQUOTE STRING DQUOTE  {$$ = ;}| 
     name {$$ = ;}| 
     ADRS name {$$ = ;}

math: valvar oper math {$$ = mknode(, ,);}| 
      valvar SEMICOLON {$$ = mknode(" ",,NULL);}| 
      valvar math {$$ = mknode(" ",,);}

oper: ADD {$$ = "+";}|
      DIV {$$ = "/";}|
      SUB {$$ = "-";}|
      MUL {$$ = "*";}

cond: valvar EQ valvar {$$ = mknode(,,);}| 
      valvar GT valvar {$$ = mknode(,,);}| 
      valvar GTEQ valvar {$$ = mknode(,,);}| 
      valvar LT valvar {$$ = mknode(,,);}| 
      valvar LTEQ valvar {$$ = mknode(,,);}| 
      valvar NOTEQ valvar {$$ = mknode(,,);}

valvar: name {$$ = mknode(,NULL,NULL);}|
        INTVAL {$$ = mknode(,NULL,NULL);}
%%

int main(){
    return yyparse();
}

void printtree(node* tree){
    printf("%s\n", tree->token);
    if(tree->left)
        printtree(tree->left);
    if(tree->right)
        printtree(tree->right);
}

node* mknode(char* token, node* left, node* right){
    node* newnode = (node*)malloc(sizeof(node));
    char* newstr = (char*)malloc(strlen(token)+1);
    strcpy(newstr, token);
    newnode->left = left;
    newnode->right = right;
    return newnode;
}


int yyerror(){
    printf("language error\n");
    return 0;
}

目前输入的测试码是:

void foo(int x){
    if (x==5){
        return 'a';
    }
}

在许多其他问题中,您的 mknode 函数从未设置 newnode->token。所以它是未定义的——你很幸运它包含一个 null 而不是一个会崩溃的无效指针。