bison 中的解析在第一行停止
Parsing in bison stops at first line
我正在用类似 python 的语言创建一个 bison 语法,当 运行 设置我的测试代码文件时我得到的输出是这样的:
found identifier a at line 2
memory exhausted
Parsing completed successfully
我遇到了一些 shift reduce 错误和 reduce reduce 但我可以正常创建我的 .exe 文件,如果我 运行 它显示了这个。
我试过减少我的大部分 shift/reduces 都无济于事。那真的是问题所在吗?因为我认为它不会给我 .exe
.l 文件
%{
#include <stdio.h>
#include <stdlib.h>
#include "sym_tab.h"
#include "define.h"
FILE *new_file;
int stringtoint;
int current_indent = 0;
void count();
void comment();
int count_indent();
%}
L [A-Za-z]
D [0-9]
N [1-9]
C "%"|"!"|"@"|"$"|"%"|"^"|"&"|"_"
identifier {L}({L}|{D})*
dec_const (-|\+)*(0|{N}{D}*)
blank [ \v\f]+
invalid_identifier {D}|{C}(({L}|{D})*|{L})
invalid_keyword {C}({L}|{D})+
block_count ^[\t]+
%%
"while" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(WHILE); }
"for" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(FOR); }
"in range" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(IN_RANGE); }
"input" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(INPUT); }
"print" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(PRINT); }
"if" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(IF); }
"elif" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(ELIF); }
"else" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(ELSE); }
"and" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(AND); }
"not" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(NOT); }
"or" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(OR); }
"return" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(RETURN); }
"exit" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(EXIT); }
"def" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(DEF); }
L?\"(\.|[^\"])*\" {char *yycopy=strdup(yytext); count(); printf("found literal string %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(STRING_LITERAL); }
{dec_const} {char *yycopy=strdup(yytext); count(); stringtoint=atoi(yycopy);if(stringtoint<(-32768)|| stringtoint>32767){
printf("dec_const %d in line %d not an acceptable value\n",stringtoint,line);}else{
printf("found dec_constant %s at line %d\n", yycopy,line);
addsym( yycopy, block_num ); return(DEC_CONST);}}
{identifier} {char *yycopy=strdup(yytext); count(); if(strlen(yycopy)>20){
printf("identifier %s in line %d not valid(longer than 20 characters)\n",yycopy,line);}
else{printf("found identifier %s at line %d\n", yycopy,line);
addsym( yycopy, block_num ); return(IDENTIFIER);}}
"+" {char *yycopy=strdup(yytext); count(); printf("found symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(PLUS);}
"-" {char *yycopy=strdup(yytext); count(); printf("found symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(MINUS);}
"*" {char *yycopy=strdup(yytext); count(); printf("found symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(STAR);}
"/" {char *yycopy=strdup(yytext); count(); printf("found symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(DIV);}
"<" {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(L_THAN);}
">" {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(G_THAN);}
"==" {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(EQUAL);}
"<=" {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(L_EQ_THAN);}
">=" {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(G_EQ_THAN);}
"<>" {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(NEQUAL);}
":=" {char *yycopy=strdup(yytext); count(); printf("found asign_symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(ASSIGN);}
"(" {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(LPAREN);}
")" {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(RPAREN);}
"[" {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(LSQUARE_BRACK);}
"]" {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(RSQUARE_BRACK);}
"\n" {char *yycopy=strdup(yytext); count(); printf("found new line at line %d\n" , line);
addsym( yycopy, block_num ); return(END_LINE);}
"," {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(COMMA);}
":" {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(COLON);}
"#" { comment();}
{blank} { count();}
{invalid_keyword} {char *yycopy=strdup(yytext); count(); printf("invalid keyword %s at line %d\n", yycopy, line);
addsym( yycopy, block_num );}
{invalid_identifier} {char *yycopy=strdup(yytext); count(); printf("invalid identifier %s at line %d\n", yycopy, line);
addsym( yycopy, block_num );}
. {char *yycopy=strdup(yytext); count(); printf("unexpected character %s at line %d\n", yycopy, line);
addsym( yycopy, block_num );}
%%
int yywrap()
{
return 1;
}
// void main(int argc, char *argv[]){
// int ret_val=1;
//
// if (argc!=2) printf("\nUsage: lexyy <input file name> \n");
// else
// if ((new_file=fopen(argv[1],"r"))==NULL)
// printf("\n<%s> not found.\n",argv[1]);
// else{
// yyrestart(new_file);
// while(ret_val!=0){
// ret_val=yylex();
// }
// fclose(new_file);
// }
//}
void count()
{
int i;
for(i=0;yytext[i]!='[=11=]';i++)
if(yytext[i]=='\n')
{
line++;
}
}
int count_indent()
{
int i;
int tab_num = 0;
for(i=0;yytext[i]=='\t';i++)
{
tab_num++;
}
return tab_num;
}
void comment()
{
int c;
while(c=input()!='\n' && c!=EOF)
{
}
line++;
}
.y 文件
%{
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include <ctype.h>
#include "y.tab.h"
extern int yylex();
extern FILE *yyin;
%}
%token L_THAN G_THAN EQUAL L_EQ_THAN G_EQ_THAN NEQUAL
%token ASSIGN
%token LPAREN RPAREN LSQUARE_BRACK RSQUARE_BRACK
%token END_LINE COMMA COLON
%token INDENT DEDENT
%start Program
%%
Program: Block Program
| Empty
;
Empty: /* empty */
;
Block: Declarations
| Subprograms
| Sequence
;
%%
extern int column;
int main(int argc, char *argv[])
{
yyin = fopen("test_code.sy", "r");
if(yyparse()==1)
printf("\nParsing failed\n\n");
else
printf("\nParsing completed successfully\n");
fclose(yyin);
return 0;
}
int yyerror(s)
char *s;
{
printf("%s\n", s);
fflush(stdout);
return 1;
}
**编辑:**sym_tab.h 文件
/*#include <iostream.h>*/
#include <stdio.h>
#include <malloc.h>
#include <string.h>
#define table_size 100
extern int line=1;
extern int end_file=1;
extern int block_num=0;
extern FILE *new_file;
typedef struct hash_sym
{
struct hash_sym *prev, *next;
char *nam;
char *str_val;
char *id_type;
int id_value;
int block_num;
} Hashing_table;
Hashing_table *table[table_size];
int hash_funct( char str[], int hash_size);
addsym( sym, bloc_num )
register char sym[];
int bloc_num;
{
int hash_val = hash_funct( sym, table_size );
register struct hash_sym *sym_sym = table[hash_val];
register struct hash_sym *new_sym;
register struct hash_sym *successor;
while ( sym_sym!=0 )
{
if ( strcmp( sym, table[hash_val]->nam )==0 )
{
printf("the entry %s at line %d already exists at symbol table\n", sym,line);
return -1;
}
sym_sym = sym_sym->next;
}
new_sym = (struct hash_sym *)
malloc( sizeof( struct hash_sym ) );
if ( (successor = table[hash_val]) )
{
new_sym->next = successor;
successor->prev = new_sym;
}
else
new_sym->next = NULL;
new_sym->prev = NULL;
new_sym->nam = sym;
new_sym->block_num = bloc_num;
table[hash_val] = new_sym;
return 0;
}
int hash_funct( str, hash_size )
register char str[];
int hash_size;
{
register int hashval;
register int i;
hashval = 0;
i = 0;
while ( str[i]!='[=13=]' )
{
hashval = hashval + str[i++]*(16+i);
/*hashval %= hash_size;*/
}
return (hashval %= hash_size);
}
我希望解析器解析到文件末尾。我还没有在我的 .y 文件中设置印刷品,所以我不希望那里有印刷品。
基本问题是您的语法包含任意重复的非终结符,它可以匹配空字符串。
这总是模棱两可的,因为不可能区分一个空字符串和两个连续的空字符串,或者实际上是一百万个连续的空字符串。因此,重复可空非终结符总是会产生冲突。大多数时候,生成的解析器只是不正确,但它仍然会终止。 Bison 通过选择移位来解决 shift/reduce 冲突,这保证了解析器将通过输入进行处理。实际上,它用尽可能小的答案解决了问题 "how many empty strings are there",通常是 "one".
但在您的情况下,重复有不止一种选择,其中有几种可以为空。现在解析器有一个更难的问题:它必须弄清楚空字符串应该匹配哪个非终端。这是一个 reduce/reduce 冲突,bison 的解决方案是始终选择语法中最先出现的非终结符。如果特定输入的正确选择是其他一些非终端,那将是一个问题。
下面是我所说内容的一个最小示例:
%%
list: %empty | unit list
unit: as | bs
as: %empty | as 'a'
bs: %empty | bs 'b'
在这里,一个unit
可以是零个或多个a
,或者零个或多个b
。由于零个 a
和零个 b
看起来相同,解析器真的无法从语法中判断选择哪个,所以它总是选择零 a
(因为它先出现在语法中)。当输入包含 b
时,问题就来了。由于解析器从不使用规则 bs: %empty
(实际上,bison 会警告您这一点),因此它永远不会应用规则 bs: bs 'b'
。因此面对 b
,解析器减少一个空的 as
,将其变成 unit
,将其添加到 list
,然后尝试解析另一个 unit
.然而,一切都没有改变;没有读取任何标记,因此前瞻仍然是 b
。因此解析器将进入无限循环,一遍又一遍地解析包含空 as
的空 unit
。
有了上面写的 list
产生式(正确递归),这些空的 unit
需要添加到解析器堆栈。因此,解析器最终将 运行 超出其堆栈的 space,并因 "memory exhausted" 错误而死。如果改成左递归(list: %empty | list unit
),那么解析器就不需要使用栈space,可以一直解析空的unit
]
我建议您使用 bison 非常有用的跟踪功能尝试上面的简单示例(请参阅 Bison 手册中的 "Debugging your parser")。这比用 printf
调用填充语法文件要容易得多,而且信息量也大得多。
要解决这个问题,你只需要要求 unit
是非空的,这样就避免了“重复空字符串”的问题。如果每个 unit
都需要匹配一些东西,那么语法匹配相同的语言,但它是明确的。
我正在用类似 python 的语言创建一个 bison 语法,当 运行 设置我的测试代码文件时我得到的输出是这样的:
found identifier a at line 2
memory exhausted
Parsing completed successfully
我遇到了一些 shift reduce 错误和 reduce reduce 但我可以正常创建我的 .exe 文件,如果我 运行 它显示了这个。
我试过减少我的大部分 shift/reduces 都无济于事。那真的是问题所在吗?因为我认为它不会给我 .exe
.l 文件
%{
#include <stdio.h>
#include <stdlib.h>
#include "sym_tab.h"
#include "define.h"
FILE *new_file;
int stringtoint;
int current_indent = 0;
void count();
void comment();
int count_indent();
%}
L [A-Za-z]
D [0-9]
N [1-9]
C "%"|"!"|"@"|"$"|"%"|"^"|"&"|"_"
identifier {L}({L}|{D})*
dec_const (-|\+)*(0|{N}{D}*)
blank [ \v\f]+
invalid_identifier {D}|{C}(({L}|{D})*|{L})
invalid_keyword {C}({L}|{D})+
block_count ^[\t]+
%%
"while" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(WHILE); }
"for" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(FOR); }
"in range" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(IN_RANGE); }
"input" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(INPUT); }
"print" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(PRINT); }
"if" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(IF); }
"elif" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(ELIF); }
"else" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(ELSE); }
"and" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(AND); }
"not" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(NOT); }
"or" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(OR); }
"return" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(RETURN); }
"exit" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(EXIT); }
"def" {char *yycopy=strdup(yytext); count(); printf("found keyword %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(DEF); }
L?\"(\.|[^\"])*\" {char *yycopy=strdup(yytext); count(); printf("found literal string %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(STRING_LITERAL); }
{dec_const} {char *yycopy=strdup(yytext); count(); stringtoint=atoi(yycopy);if(stringtoint<(-32768)|| stringtoint>32767){
printf("dec_const %d in line %d not an acceptable value\n",stringtoint,line);}else{
printf("found dec_constant %s at line %d\n", yycopy,line);
addsym( yycopy, block_num ); return(DEC_CONST);}}
{identifier} {char *yycopy=strdup(yytext); count(); if(strlen(yycopy)>20){
printf("identifier %s in line %d not valid(longer than 20 characters)\n",yycopy,line);}
else{printf("found identifier %s at line %d\n", yycopy,line);
addsym( yycopy, block_num ); return(IDENTIFIER);}}
"+" {char *yycopy=strdup(yytext); count(); printf("found symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(PLUS);}
"-" {char *yycopy=strdup(yytext); count(); printf("found symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(MINUS);}
"*" {char *yycopy=strdup(yytext); count(); printf("found symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(STAR);}
"/" {char *yycopy=strdup(yytext); count(); printf("found symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(DIV);}
"<" {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(L_THAN);}
">" {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(G_THAN);}
"==" {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(EQUAL);}
"<=" {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(L_EQ_THAN);}
">=" {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(G_EQ_THAN);}
"<>" {char *yycopy=strdup(yytext); count(); printf("found equation_symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(NEQUAL);}
":=" {char *yycopy=strdup(yytext); count(); printf("found asign_symbol %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(ASSIGN);}
"(" {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(LPAREN);}
")" {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(RPAREN);}
"[" {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(LSQUARE_BRACK);}
"]" {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(RSQUARE_BRACK);}
"\n" {char *yycopy=strdup(yytext); count(); printf("found new line at line %d\n" , line);
addsym( yycopy, block_num ); return(END_LINE);}
"," {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(COMMA);}
":" {char *yycopy=strdup(yytext); count(); printf("found %s at line %d\n" ,yycopy, line);
addsym( yycopy, block_num ); return(COLON);}
"#" { comment();}
{blank} { count();}
{invalid_keyword} {char *yycopy=strdup(yytext); count(); printf("invalid keyword %s at line %d\n", yycopy, line);
addsym( yycopy, block_num );}
{invalid_identifier} {char *yycopy=strdup(yytext); count(); printf("invalid identifier %s at line %d\n", yycopy, line);
addsym( yycopy, block_num );}
. {char *yycopy=strdup(yytext); count(); printf("unexpected character %s at line %d\n", yycopy, line);
addsym( yycopy, block_num );}
%%
int yywrap()
{
return 1;
}
// void main(int argc, char *argv[]){
// int ret_val=1;
//
// if (argc!=2) printf("\nUsage: lexyy <input file name> \n");
// else
// if ((new_file=fopen(argv[1],"r"))==NULL)
// printf("\n<%s> not found.\n",argv[1]);
// else{
// yyrestart(new_file);
// while(ret_val!=0){
// ret_val=yylex();
// }
// fclose(new_file);
// }
//}
void count()
{
int i;
for(i=0;yytext[i]!='[=11=]';i++)
if(yytext[i]=='\n')
{
line++;
}
}
int count_indent()
{
int i;
int tab_num = 0;
for(i=0;yytext[i]=='\t';i++)
{
tab_num++;
}
return tab_num;
}
void comment()
{
int c;
while(c=input()!='\n' && c!=EOF)
{
}
line++;
}
.y 文件
%{
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include <ctype.h>
#include "y.tab.h"
extern int yylex();
extern FILE *yyin;
%}
%token L_THAN G_THAN EQUAL L_EQ_THAN G_EQ_THAN NEQUAL
%token ASSIGN
%token LPAREN RPAREN LSQUARE_BRACK RSQUARE_BRACK
%token END_LINE COMMA COLON
%token INDENT DEDENT
%start Program
%%
Program: Block Program
| Empty
;
Empty: /* empty */
;
Block: Declarations
| Subprograms
| Sequence
;
%%
extern int column;
int main(int argc, char *argv[])
{
yyin = fopen("test_code.sy", "r");
if(yyparse()==1)
printf("\nParsing failed\n\n");
else
printf("\nParsing completed successfully\n");
fclose(yyin);
return 0;
}
int yyerror(s)
char *s;
{
printf("%s\n", s);
fflush(stdout);
return 1;
}
**编辑:**sym_tab.h 文件
/*#include <iostream.h>*/
#include <stdio.h>
#include <malloc.h>
#include <string.h>
#define table_size 100
extern int line=1;
extern int end_file=1;
extern int block_num=0;
extern FILE *new_file;
typedef struct hash_sym
{
struct hash_sym *prev, *next;
char *nam;
char *str_val;
char *id_type;
int id_value;
int block_num;
} Hashing_table;
Hashing_table *table[table_size];
int hash_funct( char str[], int hash_size);
addsym( sym, bloc_num )
register char sym[];
int bloc_num;
{
int hash_val = hash_funct( sym, table_size );
register struct hash_sym *sym_sym = table[hash_val];
register struct hash_sym *new_sym;
register struct hash_sym *successor;
while ( sym_sym!=0 )
{
if ( strcmp( sym, table[hash_val]->nam )==0 )
{
printf("the entry %s at line %d already exists at symbol table\n", sym,line);
return -1;
}
sym_sym = sym_sym->next;
}
new_sym = (struct hash_sym *)
malloc( sizeof( struct hash_sym ) );
if ( (successor = table[hash_val]) )
{
new_sym->next = successor;
successor->prev = new_sym;
}
else
new_sym->next = NULL;
new_sym->prev = NULL;
new_sym->nam = sym;
new_sym->block_num = bloc_num;
table[hash_val] = new_sym;
return 0;
}
int hash_funct( str, hash_size )
register char str[];
int hash_size;
{
register int hashval;
register int i;
hashval = 0;
i = 0;
while ( str[i]!='[=13=]' )
{
hashval = hashval + str[i++]*(16+i);
/*hashval %= hash_size;*/
}
return (hashval %= hash_size);
}
我希望解析器解析到文件末尾。我还没有在我的 .y 文件中设置印刷品,所以我不希望那里有印刷品。
基本问题是您的语法包含任意重复的非终结符,它可以匹配空字符串。
这总是模棱两可的,因为不可能区分一个空字符串和两个连续的空字符串,或者实际上是一百万个连续的空字符串。因此,重复可空非终结符总是会产生冲突。大多数时候,生成的解析器只是不正确,但它仍然会终止。 Bison 通过选择移位来解决 shift/reduce 冲突,这保证了解析器将通过输入进行处理。实际上,它用尽可能小的答案解决了问题 "how many empty strings are there",通常是 "one".
但在您的情况下,重复有不止一种选择,其中有几种可以为空。现在解析器有一个更难的问题:它必须弄清楚空字符串应该匹配哪个非终端。这是一个 reduce/reduce 冲突,bison 的解决方案是始终选择语法中最先出现的非终结符。如果特定输入的正确选择是其他一些非终端,那将是一个问题。
下面是我所说内容的一个最小示例:
%%
list: %empty | unit list
unit: as | bs
as: %empty | as 'a'
bs: %empty | bs 'b'
在这里,一个unit
可以是零个或多个a
,或者零个或多个b
。由于零个 a
和零个 b
看起来相同,解析器真的无法从语法中判断选择哪个,所以它总是选择零 a
(因为它先出现在语法中)。当输入包含 b
时,问题就来了。由于解析器从不使用规则 bs: %empty
(实际上,bison 会警告您这一点),因此它永远不会应用规则 bs: bs 'b'
。因此面对 b
,解析器减少一个空的 as
,将其变成 unit
,将其添加到 list
,然后尝试解析另一个 unit
.然而,一切都没有改变;没有读取任何标记,因此前瞻仍然是 b
。因此解析器将进入无限循环,一遍又一遍地解析包含空 as
的空 unit
。
有了上面写的 list
产生式(正确递归),这些空的 unit
需要添加到解析器堆栈。因此,解析器最终将 运行 超出其堆栈的 space,并因 "memory exhausted" 错误而死。如果改成左递归(list: %empty | list unit
),那么解析器就不需要使用栈space,可以一直解析空的unit
我建议您使用 bison 非常有用的跟踪功能尝试上面的简单示例(请参阅 Bison 手册中的 "Debugging your parser")。这比用 printf
调用填充语法文件要容易得多,而且信息量也大得多。
要解决这个问题,你只需要要求 unit
是非空的,这样就避免了“重复空字符串”的问题。如果每个 unit
都需要匹配一些东西,那么语法匹配相同的语言,但它是明确的。