将令牌分配给结构节点类型变量后,令牌为空
after assigning token to struct node type variable the token is null
我正在解决这个奇怪的问题(至少对我来说)
我正在使用 lex 和 yacc 创建一个 AST
虽然所有的标记都有效并且语法也有效(我只用印刷品测试了它)
当我试图将一个新节点分配给一个变量时,只是为了测试我正在尝试打印节点的令牌,每次我尝试它时令牌只是(空)
例如,这是我的代码的一部分:
ifelse: IF LEFTBRACKET cond RIGHTBRACKET LEFTBLOCK body RIGHTBLOCK {$$ = mknode("IF",,);printf("token is %s\n",$$->token);}
mknode 函数是这样工作的:
node* mknode(char* token, node* left, node* right){
node* newnode = (node*)malloc(sizeof(node));
char* newstr = (char*)malloc(sizeof(token)+1);
strcpy(newstr, token);
newnode->left = left;
newnode->right = right;
return newnode;
}
所以对于这个例子,这是输出:
token is (null)
知道为什么它保持为空吗?
如果重要的话,我正在使用 VMWare Ubuntu 20.04
完整的 lex 文件:
%{
#include "y.tab.h"
#include <stdio.h>
#include <string.h>
%}
%%
"\"" {printf("LEX: double quote here\n");return DQUOTE;}
"\'" {printf("LEX: single quote here\n");return SQUOTE;}
"}" {printf("LEX: } here\n");return RIGHTBLOCK;}
"{" {printf("LEX: { here\n");return LEFTBLOCK;}
";" {printf("LEX: ; here\n");return SEMICOLON;}
"," {printf("LEX: comma here\n");return COMMA;}
"(" {printf("LEX: opening bracket here\n");return LEFTBRACKET;}
")" {printf("LEX: closing bracket here\n");return RIGHTBRACKET;}
&& {printf("LEX: and here\n");return AND;}
"||" {printf("LEX: || here\n");return OR;}
"=" {printf("LEX: assign here\n");return ASSIGN;}
== {printf("LEX: == here\n");return EQ;}
">" {printf("LEX: > here\n");return GT;}
">=" {printf("LEX: >= here\n");return GTEQ;}
"<" {printf("LEX: < here\n");return LT;}
"<=" {printf("LEX: <= here\n");return LTEQ;}
"-" {printf("LEX: - here\n");return SUB;}
"!" {printf("LEX: ! here\n");return NOT;}
"!=" {printf("LEX: != here\n");return NOTEQ;}
"/" {printf("LEX: div here\n");return DIV;}
"+" {printf("LEX: add here\n");return ADD;}
"*" {printf("LEX: mul here\n");return MUL;}
"&" {printf("LEX: & here\n");return ADRS;}
if {printf("LEX: if here\n");return IF;}
else {printf("LEX: else here\n");return ELSE;}
do {printf("LEX: do here\n");return DO;}
while {printf("LEX: while here\n");return WHILE;}
for {printf("LEX: for here\n");return FOR;}
var {printf("LEX: var here\n");return VAR;}
return {printf("LEX: return here\n");return RETURN;}
null {printf("LEX: nullval here\n");return NULLVAL;}
void {printf("LEX: func return type here\n");return VOID;}
"int*" {printf("LEX: int* type here\n");return INTPOINT;}
"char*" {printf("LEX: char* type here\n");return CHARPOINT;}
"real*" {printf("LEX: real* type here\n");return REALPOINT;}
int {printf("LEX: int type here\n");return INT;}
real {printf("LEX: real type here\n");return REAL;}
char {printf("LEX: char type here\n");return CHAR;}
bool {printf("LEX: bool type here\n");return BOOL;}
"true"|"false" {printf("LEX: boolval here\n");return BOOLVAL;}
[0-9]+ {yylval.string = yytext;printf("LEX: int val here\n");return INTVAL;}
"-"|0|[1-9][0-9]+"."[0-9]+|[1-9][0-9]+'.'[0-9]['E'|'e']['+'|'-'][0-9]+ {printf("LEX: realval here\n");yylval.string = strdup(yytext);return REALVAL;}
[a-zA-Z][0-9]*"_"[a-zA-Z]* {printf("LEX: ID here\n");yylval.string = strdup(yytext);return ID;}
[a-zA-Z] {printf("LEX: char here\n");yylval.string = strdup(yytext);return CHARVAL;}
[a-zAZ]*[0-9]*[a-zAZ]+[0-9]*[a-zAZ]* {printf("LEX: string here\n");yylval.string = strdup(yytext);return STRING;}
. ;
%%
完整的yacc文件:
%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <string.h>
#include "lex.yy.c"
int yyerror();
typedef struct node
{
char* token;
struct node *left;
struct node *right;
}node;
node* mknode(char* token, node* left, node* right);
void printtree(node* tree);
%}
%union
{
struct node *node;
char* string;
}
%token <string> DIV ADD MUL SUB AND NOT OR RETURN ASSIGN
%token <string> EQ GT GTEQ LT LTEQ NOTEQ SEMICOLON COMMA LEFTBRACKET RIGHTBRACKET RIGHTBLOCK LEFTBLOCK
%token <string> ID CHARVAL INTVAL REALVAL BOOLVAL STRING ADRS DQUOTE SQUOTE
%token <node> INT REAL BOOL CHAR VOID INTPOINT CHARPOINT REALPOINT VAR NULLVAL
%token <node> IF ELSE WHILE DO FOR
%type <string> name oper type rettype ret
%type <node> code ifelse body action args argnum math cond params block valvar
%left SEMICOLON COMMA RIGHTBRACKET RIGHTBLOCK
%right LEFTBLOCK
%%
st: code {printf("YACC: Code done!\n");}
code: rettype name params block code {
$$ = mknode("(FUNC",mknode(,mknode("(ARGS",,NULL),mknode("(RET",mknode(,NULL,NULL),NULL)),mknode("(BODY",, NULL));
printf("YACC: func ready\n");}| {}
params: LEFTBRACKET args RIGHTBRACKET {$$ = ;}
block: LEFTBLOCK code RIGHTBLOCK {$$ = ;}|
LEFTBLOCK body RIGHTBLOCK {$$ = ;}|
LEFTBLOCK code body RIGHTBLOCK {$$ = mknode("",,);}
ifelse: IF LEFTBRACKET cond RIGHTBRACKET LEFTBLOCK body RIGHTBLOCK {$$ = mknode("IF",,);printf("token is %s\n",$$->token);printf("YACC: if ready\n");}|
IF LEFTBRACKET cond RIGHTBRACKET LEFTBLOCK body RIGHTBLOCK ELSE LEFTBLOCK body RIGHTBLOCK {$$ = mknode("IF",,mknode("",,mknode("ELSE",,NULL)));
printf("YACC: if else ready\n");}
rettype:VOID {$$ = "VOID";}|
type {$$ = ;}
name: STRING {$$ = ;}|
CHARVAL {$$ = ;}
args: type argnum {$$ = mknode(,mknode(" ",NULL,NULL),);}|
{$$ = mknode("",NULL,NULL);printf("YACC: args ready\n");}
argnum: name argnum {$$ = mknode(,mknode(" ",NULL,NULL),);}|
COMMA argnum {$$ = mknode(" ",,NULL);}|
SEMICOLON args {$$ = mknode(" ",,NULL);}|
{$$ = NULL;printf("YACC: args num ready\n");}
type: INT {$$ = "INT";}|
REAL {$$ = "REAL";}|
CHAR {$$ = "CHAR";}|
INTPOINT {$$ = "INT*";}|
CHARPOINT {$$ = "CHAR*";}|
REALPOINT {$$ = "REAL*";};
body: action body {$$ = mknode(" ",,);}|
action {$$ = mknode(" ",,NULL);}|
ifelse {$$ = mknode("(IF-ELSE",mknode("\n",NULL,NULL),);printf("YACC: block ready\n");}
action: name ASSIGN math {$$ = mknode(,mknode(,NULL,NULL),mknode(" ",,NULL));printf("YACC: action ready\n");}|
RETURN ret SEMICOLON {$$ = mknode("(RET",mknode(,NULL,NULL),NULL);printf("YACC: return action ready\n");}
ret: INTVAL {$$ = ;}|
SQUOTE CHARVAL SQUOTE {$$ = ;}|
REALVAL {$$ = ;}|
DQUOTE STRING DQUOTE {$$ = ;}|
name {$$ = ;}|
ADRS name {$$ = ;}
math: valvar oper math {$$ = mknode(, ,);}|
valvar SEMICOLON {$$ = mknode(" ",,NULL);}|
valvar math {$$ = mknode(" ",,);}
oper: ADD {$$ = "+";}|
DIV {$$ = "/";}|
SUB {$$ = "-";}|
MUL {$$ = "*";}
cond: valvar EQ valvar {$$ = mknode(,,);}|
valvar GT valvar {$$ = mknode(,,);}|
valvar GTEQ valvar {$$ = mknode(,,);}|
valvar LT valvar {$$ = mknode(,,);}|
valvar LTEQ valvar {$$ = mknode(,,);}|
valvar NOTEQ valvar {$$ = mknode(,,);}
valvar: name {$$ = mknode(,NULL,NULL);}|
INTVAL {$$ = mknode(,NULL,NULL);}
%%
int main(){
return yyparse();
}
void printtree(node* tree){
printf("%s\n", tree->token);
if(tree->left)
printtree(tree->left);
if(tree->right)
printtree(tree->right);
}
node* mknode(char* token, node* left, node* right){
node* newnode = (node*)malloc(sizeof(node));
char* newstr = (char*)malloc(strlen(token)+1);
strcpy(newstr, token);
newnode->left = left;
newnode->right = right;
return newnode;
}
int yyerror(){
printf("language error\n");
return 0;
}
目前输入的测试码是:
void foo(int x){
if (x==5){
return 'a';
}
}
在许多其他问题中,您的 mknode
函数从未设置 newnode->token
。所以它是未定义的——你很幸运它包含一个 null 而不是一个会崩溃的无效指针。
我正在解决这个奇怪的问题(至少对我来说) 我正在使用 lex 和 yacc 创建一个 AST 虽然所有的标记都有效并且语法也有效(我只用印刷品测试了它) 当我试图将一个新节点分配给一个变量时,只是为了测试我正在尝试打印节点的令牌,每次我尝试它时令牌只是(空) 例如,这是我的代码的一部分:
ifelse: IF LEFTBRACKET cond RIGHTBRACKET LEFTBLOCK body RIGHTBLOCK {$$ = mknode("IF",,);printf("token is %s\n",$$->token);}
mknode 函数是这样工作的:
node* mknode(char* token, node* left, node* right){
node* newnode = (node*)malloc(sizeof(node));
char* newstr = (char*)malloc(sizeof(token)+1);
strcpy(newstr, token);
newnode->left = left;
newnode->right = right;
return newnode;
}
所以对于这个例子,这是输出:
token is (null)
知道为什么它保持为空吗? 如果重要的话,我正在使用 VMWare Ubuntu 20.04
完整的 lex 文件:
%{
#include "y.tab.h"
#include <stdio.h>
#include <string.h>
%}
%%
"\"" {printf("LEX: double quote here\n");return DQUOTE;}
"\'" {printf("LEX: single quote here\n");return SQUOTE;}
"}" {printf("LEX: } here\n");return RIGHTBLOCK;}
"{" {printf("LEX: { here\n");return LEFTBLOCK;}
";" {printf("LEX: ; here\n");return SEMICOLON;}
"," {printf("LEX: comma here\n");return COMMA;}
"(" {printf("LEX: opening bracket here\n");return LEFTBRACKET;}
")" {printf("LEX: closing bracket here\n");return RIGHTBRACKET;}
&& {printf("LEX: and here\n");return AND;}
"||" {printf("LEX: || here\n");return OR;}
"=" {printf("LEX: assign here\n");return ASSIGN;}
== {printf("LEX: == here\n");return EQ;}
">" {printf("LEX: > here\n");return GT;}
">=" {printf("LEX: >= here\n");return GTEQ;}
"<" {printf("LEX: < here\n");return LT;}
"<=" {printf("LEX: <= here\n");return LTEQ;}
"-" {printf("LEX: - here\n");return SUB;}
"!" {printf("LEX: ! here\n");return NOT;}
"!=" {printf("LEX: != here\n");return NOTEQ;}
"/" {printf("LEX: div here\n");return DIV;}
"+" {printf("LEX: add here\n");return ADD;}
"*" {printf("LEX: mul here\n");return MUL;}
"&" {printf("LEX: & here\n");return ADRS;}
if {printf("LEX: if here\n");return IF;}
else {printf("LEX: else here\n");return ELSE;}
do {printf("LEX: do here\n");return DO;}
while {printf("LEX: while here\n");return WHILE;}
for {printf("LEX: for here\n");return FOR;}
var {printf("LEX: var here\n");return VAR;}
return {printf("LEX: return here\n");return RETURN;}
null {printf("LEX: nullval here\n");return NULLVAL;}
void {printf("LEX: func return type here\n");return VOID;}
"int*" {printf("LEX: int* type here\n");return INTPOINT;}
"char*" {printf("LEX: char* type here\n");return CHARPOINT;}
"real*" {printf("LEX: real* type here\n");return REALPOINT;}
int {printf("LEX: int type here\n");return INT;}
real {printf("LEX: real type here\n");return REAL;}
char {printf("LEX: char type here\n");return CHAR;}
bool {printf("LEX: bool type here\n");return BOOL;}
"true"|"false" {printf("LEX: boolval here\n");return BOOLVAL;}
[0-9]+ {yylval.string = yytext;printf("LEX: int val here\n");return INTVAL;}
"-"|0|[1-9][0-9]+"."[0-9]+|[1-9][0-9]+'.'[0-9]['E'|'e']['+'|'-'][0-9]+ {printf("LEX: realval here\n");yylval.string = strdup(yytext);return REALVAL;}
[a-zA-Z][0-9]*"_"[a-zA-Z]* {printf("LEX: ID here\n");yylval.string = strdup(yytext);return ID;}
[a-zA-Z] {printf("LEX: char here\n");yylval.string = strdup(yytext);return CHARVAL;}
[a-zAZ]*[0-9]*[a-zAZ]+[0-9]*[a-zAZ]* {printf("LEX: string here\n");yylval.string = strdup(yytext);return STRING;}
. ;
%%
完整的yacc文件:
%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <string.h>
#include "lex.yy.c"
int yyerror();
typedef struct node
{
char* token;
struct node *left;
struct node *right;
}node;
node* mknode(char* token, node* left, node* right);
void printtree(node* tree);
%}
%union
{
struct node *node;
char* string;
}
%token <string> DIV ADD MUL SUB AND NOT OR RETURN ASSIGN
%token <string> EQ GT GTEQ LT LTEQ NOTEQ SEMICOLON COMMA LEFTBRACKET RIGHTBRACKET RIGHTBLOCK LEFTBLOCK
%token <string> ID CHARVAL INTVAL REALVAL BOOLVAL STRING ADRS DQUOTE SQUOTE
%token <node> INT REAL BOOL CHAR VOID INTPOINT CHARPOINT REALPOINT VAR NULLVAL
%token <node> IF ELSE WHILE DO FOR
%type <string> name oper type rettype ret
%type <node> code ifelse body action args argnum math cond params block valvar
%left SEMICOLON COMMA RIGHTBRACKET RIGHTBLOCK
%right LEFTBLOCK
%%
st: code {printf("YACC: Code done!\n");}
code: rettype name params block code {
$$ = mknode("(FUNC",mknode(,mknode("(ARGS",,NULL),mknode("(RET",mknode(,NULL,NULL),NULL)),mknode("(BODY",, NULL));
printf("YACC: func ready\n");}| {}
params: LEFTBRACKET args RIGHTBRACKET {$$ = ;}
block: LEFTBLOCK code RIGHTBLOCK {$$ = ;}|
LEFTBLOCK body RIGHTBLOCK {$$ = ;}|
LEFTBLOCK code body RIGHTBLOCK {$$ = mknode("",,);}
ifelse: IF LEFTBRACKET cond RIGHTBRACKET LEFTBLOCK body RIGHTBLOCK {$$ = mknode("IF",,);printf("token is %s\n",$$->token);printf("YACC: if ready\n");}|
IF LEFTBRACKET cond RIGHTBRACKET LEFTBLOCK body RIGHTBLOCK ELSE LEFTBLOCK body RIGHTBLOCK {$$ = mknode("IF",,mknode("",,mknode("ELSE",,NULL)));
printf("YACC: if else ready\n");}
rettype:VOID {$$ = "VOID";}|
type {$$ = ;}
name: STRING {$$ = ;}|
CHARVAL {$$ = ;}
args: type argnum {$$ = mknode(,mknode(" ",NULL,NULL),);}|
{$$ = mknode("",NULL,NULL);printf("YACC: args ready\n");}
argnum: name argnum {$$ = mknode(,mknode(" ",NULL,NULL),);}|
COMMA argnum {$$ = mknode(" ",,NULL);}|
SEMICOLON args {$$ = mknode(" ",,NULL);}|
{$$ = NULL;printf("YACC: args num ready\n");}
type: INT {$$ = "INT";}|
REAL {$$ = "REAL";}|
CHAR {$$ = "CHAR";}|
INTPOINT {$$ = "INT*";}|
CHARPOINT {$$ = "CHAR*";}|
REALPOINT {$$ = "REAL*";};
body: action body {$$ = mknode(" ",,);}|
action {$$ = mknode(" ",,NULL);}|
ifelse {$$ = mknode("(IF-ELSE",mknode("\n",NULL,NULL),);printf("YACC: block ready\n");}
action: name ASSIGN math {$$ = mknode(,mknode(,NULL,NULL),mknode(" ",,NULL));printf("YACC: action ready\n");}|
RETURN ret SEMICOLON {$$ = mknode("(RET",mknode(,NULL,NULL),NULL);printf("YACC: return action ready\n");}
ret: INTVAL {$$ = ;}|
SQUOTE CHARVAL SQUOTE {$$ = ;}|
REALVAL {$$ = ;}|
DQUOTE STRING DQUOTE {$$ = ;}|
name {$$ = ;}|
ADRS name {$$ = ;}
math: valvar oper math {$$ = mknode(, ,);}|
valvar SEMICOLON {$$ = mknode(" ",,NULL);}|
valvar math {$$ = mknode(" ",,);}
oper: ADD {$$ = "+";}|
DIV {$$ = "/";}|
SUB {$$ = "-";}|
MUL {$$ = "*";}
cond: valvar EQ valvar {$$ = mknode(,,);}|
valvar GT valvar {$$ = mknode(,,);}|
valvar GTEQ valvar {$$ = mknode(,,);}|
valvar LT valvar {$$ = mknode(,,);}|
valvar LTEQ valvar {$$ = mknode(,,);}|
valvar NOTEQ valvar {$$ = mknode(,,);}
valvar: name {$$ = mknode(,NULL,NULL);}|
INTVAL {$$ = mknode(,NULL,NULL);}
%%
int main(){
return yyparse();
}
void printtree(node* tree){
printf("%s\n", tree->token);
if(tree->left)
printtree(tree->left);
if(tree->right)
printtree(tree->right);
}
node* mknode(char* token, node* left, node* right){
node* newnode = (node*)malloc(sizeof(node));
char* newstr = (char*)malloc(strlen(token)+1);
strcpy(newstr, token);
newnode->left = left;
newnode->right = right;
return newnode;
}
int yyerror(){
printf("language error\n");
return 0;
}
目前输入的测试码是:
void foo(int x){
if (x==5){
return 'a';
}
}
在许多其他问题中,您的 mknode
函数从未设置 newnode->token
。所以它是未定义的——你很幸运它包含一个 null 而不是一个会崩溃的无效指针。