添加语义操作后 Lex Yacc 语法错误
Lex Yacc syntax error after adding semantic actions
我正在为一个学校项目使用 Lex&Yacc 制作一个解析器,我的语法分析有一些无法解释的问题。
首先,这是我的 yacc 文件,它不起作用。
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern FILE *yyin;
extern char* yytext;
%}
%token <code> IDENTIFIER CONSTANT SIZEOF
%token <code> PTR_OP LE_OP GE_OP LES_OP GES_OP EQ_OP NE_OP
%token <code> AND_OP OR_OP
%token <code> EXTERN
%token <code> INT VOID
%token <code> STRUCT
%token <code> IF ELSE WHILE FOR RETURN
%union
{
int number;
char* code;
}
%start program
%type <code> primary_expression postfix_expression argument_expression_list unary_expression
%type <code> unary_operator multiplicative_expression additive_expression relational_expression
%type <code> equality_expression logical_and_expression logical_or_expression expression
%type <code> declaration declaration_specifiers type_specifier struct_specifier
%type <code> struct_declaration_list struct_declaration declarator direct_declarator
%type <code> parameter_list parameter_declaration statement compound_statement
%type <code> declaration_list statement_list expression_statement selection_statement
%type <code> iteration_statement jump_statement program external_declaration
%type <code> function_definition
%%
primary_expression
: IDENTIFIER {strcpy($$,);}
| CONSTANT {strcpy($$,);}
| '(' expression ')' {
char* temp = (char*)malloc((2 + strlen())*sizeof(char));
sprintf(temp,"(%s)",);
strcpy($$,temp);
free(temp);
}
| SIZEOF '(' type_specifier ')' {
char* temp = (char*)malloc((8 + strlen())*sizeof(char));
sprintf(temp,"sizeof(%s)",);
strcpy($$,temp);
free(temp);
}
| SIZEOF '(' IDENTIFIER ')' {
char* temp = (char*)malloc((8 + strlen())*sizeof(char));
sprintf(temp,"sizeof(%s)",);
strcpy($$,temp);
free(temp);
}
;
postfix_expression
: primary_expression {strcpy($$,);}
| postfix_expression '(' ')' {
char* temp = (char*)malloc((2 + strlen())*sizeof(char));
sprintf(temp,"%s()",);
strcpy($$,temp);
free(temp);
}
| postfix_expression '(' argument_expression_list ')' {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s(%s)",,);
strcpy($$,temp);
free(temp);
}
| postfix_expression PTR_OP IDENTIFIER {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s->%s",,);
strcpy($$,temp);
free(temp);
}
;
argument_expression_list
: expression {strcpy($$,);}
| argument_expression_list ',' expression {
char* temp = (char*)malloc((1 + strlen()+strlen())*sizeof(char));
sprintf(temp,"%s,%s",,);
strcpy($$,temp);
free(temp);
}
;
unary_expression
: postfix_expression {strcpy($$,);}
| unary_operator unary_expression {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s%s",,);
strcpy($$,temp);
free(temp);}
;
unary_operator
: '&' {strcpy($$,"&");}
| '*' {strcpy($$,"*");}
| '-' {strcpy($$,"-");}
;
multiplicative_expression
: unary_expression {strcpy($$,);}
| multiplicative_expression '*' unary_expression {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s*%s",,);
strcpy($$,temp);
free(temp);
}
| multiplicative_expression '/' unary_expression {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s/%s",,);
strcpy($$,temp);
free(temp);
}
;
additive_expression
: multiplicative_expression {strcpy($$,);}
| additive_expression '+' multiplicative_expression {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s+%s",,);
strcpy($$,temp);
free(temp);
}
| additive_expression '-' multiplicative_expression {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s-%s",,);
strcpy($$,temp);
free(temp);
}
;
relational_expression
: additive_expression {strcpy($$,);}
| relational_expression LES_OP additive_expression {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s<=%s",,);
strcpy($$,temp);
free(temp);
}
| relational_expression GES_OP additive_expression {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s>=%s",,);
strcpy($$,temp);
free(temp);
}
| relational_expression LE_OP additive_expression {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s<%s",,);
strcpy($$,temp);
free(temp);
}
| relational_expression GE_OP additive_expression {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s>%s",,);
strcpy($$,temp);
free(temp);
}
;
equality_expression
: relational_expression {strcpy($$,);}
| equality_expression EQ_OP relational_expression {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s==%s",,);
strcpy($$,temp);
free(temp);
}
| equality_expression NE_OP relational_expression {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s!=%s",,);
strcpy($$,temp);
free(temp);
}
;
logical_and_expression
: equality_expression {strcpy($$,);}
| logical_and_expression AND_OP equality_expression {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s&&%s",,);
strcpy($$,temp);
free(temp);
}
;
logical_or_expression
: logical_and_expression {strcpy($$,);}
| logical_or_expression OR_OP logical_and_expression {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s||%s",,);
strcpy($$,temp);
free(temp);
}
;
expression
: logical_or_expression {strcpy($$,);}
| unary_expression '=' expression {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s=%s",,);
strcpy($$,temp);
free(temp);
}
;
declaration
: declaration_specifiers declarator ';' {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s%s;",,);
strcpy($$,temp);
free(temp);
}
| struct_specifier ';' {
char* temp = (char*)malloc((1 + strlen())*sizeof(char));
sprintf(temp,"%s;",);
strcpy($$,temp);
free(temp);
}
;
declaration_specifiers
: EXTERN type_specifier {
char* temp = (char*)malloc((strlen()+ strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
}
| type_specifier {strcpy($$,);}
;
type_specifier
: VOID {strcpy($$,);}
| INT {strcpy($$,);}
| struct_specifier {strcpy($$,);}
;
struct_specifier
: STRUCT IDENTIFIER '{' struct_declaration_list '}' {
char* temp = (char*)malloc((2 + strlen() + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s {%s}",,,);
strcpy($$,temp);
free(temp);
}
| STRUCT '{' struct_declaration_list '}' {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s {%s}",,);
strcpy($$,temp);
free(temp);
}
| STRUCT IDENTIFIER {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
}
;
struct_declaration_list
: struct_declaration {strcpy($$,);}
| struct_declaration_list struct_declaration {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
}
;
struct_declaration
: type_specifier declarator ';' {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s;",,);
strcpy($$,temp);
free(temp);
}
;
declarator
: '*' direct_declarator {
char* temp = (char*)malloc((1 + strlen())*sizeof(char));
sprintf(temp,"*%s",);
strcpy($$,temp);
free(temp);
}
| direct_declarator {strcpy($$,);}
;
direct_declarator
: IDENTIFIER {strcpy($$,);}
| '(' declarator ')' {
char* temp = (char*)malloc((2 + strlen())*sizeof(char));
sprintf(temp,"(%s)",);
strcpy($$,temp);
free(temp);
}
| direct_declarator '(' parameter_list ')' {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s(%s)",,);
strcpy($$,temp);
free(temp);
}
| direct_declarator '(' ')' {
char* temp = (char*)malloc((2 + strlen())*sizeof(char));
sprintf(temp,"%s()",);
strcpy($$,temp);
free(temp);
}
;
parameter_list
: parameter_declaration {strcpy($$,);}
| parameter_list ',' parameter_declaration {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s, %s",,);
strcpy($$,temp);
free(temp);
}
;
parameter_declaration
: declaration_specifiers declarator {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
}
;
statement
: compound_statement {strcpy($$,);}
| expression_statement {strcpy($$,);}
| selection_statement {strcpy($$,);}
| iteration_statement {strcpy($$,);}
| jump_statement {strcpy($$,);}
;
compound_statement
: '{' '}' {strcpy($$,"{}");}
| '{' statement_list '}' {
char* temp = (char*)malloc((2 + strlen())*sizeof(char));
sprintf(temp,"{%s}",);
strcpy($$,temp);
free(temp);
}
| '{' declaration_list '}' {
char* temp = (char*)malloc((2 + strlen())*sizeof(char));
sprintf(temp,"{%s}",);
strcpy($$,temp);
free(temp);
}
| '{' declaration_list statement_list '}' {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"{%s%s}",,);
strcpy($$,temp);
free(temp);
}
;
declaration_list
: declaration {strcpy($$,);}
| declaration_list declaration {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
}
;
statement_list
: statement {strcpy($$,);}
| statement_list statement {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
}
;
expression_statement
: ';' {strcpy($$,";");}
| expression ';' {
char* temp = (char*)malloc((1 + strlen())*sizeof(char));
sprintf(temp,"%s;",);
strcpy($$,temp);
free(temp);
}
;
selection_statement
: IF '(' expression ')' statement {
char* temp = (char*)malloc((2 + strlen() + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s (%s) %s",,,);
strcpy($$,temp);
free(temp);
}
| IF '(' expression ')' statement ELSE statement {
char* temp = (char*)malloc((2 + strlen() + strlen() + strlen() + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s (%s) %s %s %s",,,,,);
strcpy($$,temp);
free(temp);
}
;
iteration_statement
: WHILE '(' expression ')' statement {
char* temp = (char*)malloc((1 + strlen() + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s (%s) %s",,,);
strcpy($$,temp);
free(temp);
}
| FOR '(' expression_statement expression_statement expression ')' statement {
char* temp = (char*)malloc((2 + strlen() + strlen() + strlen() + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s (%s %s %s) %s",,,,,);
strcpy($$,temp);
free(temp);
}
;
jump_statement
: RETURN ';' {strcpy($$,);}
| RETURN expression ';' {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s;",,);
strcpy($$,temp);
}
;
program
: external_declaration {strcpy($$,);}
| program external_declaration {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
}
;
external_declaration
: function_definition {strcpy($$,);}
| declaration {strcpy($$,);}
;
function_definition
: declaration_specifiers declarator compound_statement {
char* temp = (char*)malloc((strlen() + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s %s",,,);
strcpy($$,temp);
free(temp);
}
;
%%
int yyerror(char *s) {
printf("%s\n",s);
printf("%s",yytext);
}
int main(int argc, char *argv[]) {
yyin = fopen(argv[1],"r");
yyparse();
return 0;
}
这个解析器的最终目的是生成一个带有初始代码的 3 地址代码(接近于旧的 C 语言)。
现在,我只是创建一个简单的解析器来测试输入文件是否以我的第一语言正确编写。
在我的实际代码中,这里的语义操作是为了构建与输入相同的代码,然后将其写入外部文件。
这对你来说可能看起来完全是白痴,但这是我的第一个编译器项目,我认为如果我想在未来生成代码(因为我只想要代码翻译),这对我来说是至关重要的一步。
这就是问题所在。在执行我的语义动作之前,解析器可以完美地解析测试文件,没有语法错误。但是现在,它不再起作用了。
语义动作不影响语法分析,我们确定吗?
PS:
这是我的 lex 文件:
chiffre [0-9]
lettre [a-zA-Z]
exposant [Ee][+-]?{D}+
commentaire [\/][\*][^\/\*]*[\*][\/]
identificateur {lettre}({lettre}|_|{chiffre})*
entier {chiffre}+
pointeur [-][>]
%{
#include <stdio.h>
#include "y.tab.h"
/* Ne gère pas les commentaires. A rajouter */
/* Supprimer les lignes dont vous n'avez pas besoin. */
%}
%%
"else" {yylval.code=yytext;return ELSE;}
"extern" {yylval.code=yytext;return EXTERN;}
"for" {yylval.code=yytext;return FOR;}
"if" {yylval.code=yytext;return IF;}
"int" {yylval.code=yytext;return INT;}
"return" {yylval.code=yytext;return RETURN;}
"sizeof" {yylval.code=yytext;return SIZEOF;}
"struct" {yylval.code=yytext;return STRUCT;}
"void" {yylval.code=yytext;return VOID;}
"while" {yylval.code=yytext;return WHILE;}
{entier} {yylval.code=yytext;return CONSTANT; }
{identificateur} {yylval.code=yytext;return IDENTIFIER;}
"<" {yylval.code=yytext;return LE_OP;}
">" {yylval.code=yytext;return GE_OP;}
"<=" {yylval.code=yytext;return LES_OP;}
">=" {yylval.code=yytext;return GES_OP;}
"==" {yylval.code=yytext;return EQ_OP;}
"!=" {yylval.code=yytext;return NE_OP;}
"&&" {yylval.code=yytext;return AND_OP;}
"||" {yylval.code=yytext;return OR_OP;}
"*" {yylval.code=yytext;return '*';}
"-" {yylval.code=yytext;return '-';}
"&" {yylval.code=yytext;return '&';}
{pointeur} {yylval.code=yytext;return PTR_OP;}
"+" {yylval.code=yytext;return '+';}
"/" {yylval.code=yytext;return '/';}
{commentaire} {/* commentaire ne rien faire */}
"," {yylval.code=yytext;return ',';}
";" {yylval.code=yytext;return ';';}
"(" {yylval.code=yytext;return '(';}
")" {yylval.code=yytext;return ')';}
"{" {yylval.code=yytext;return '{';}
"}" {yylval.code=yytext;return '}';}
"=" {yylval.code=yytext;return '=';}
[ \t\n\v\r]+ { /* rien faire */ }
. { printf("erreur : b");printf("%s",yytext);printf("a\n");printf("Erreur lexicale. \n"); }
%%
最后是我的测试文件之一
extern int printd( int i );
int main() {
int i;
int j;
i = 45000;
j = -123;
printd(i+j);
printd(45000+j);
printd(i+123);
printd(45000+123);
printd(i+(j+0));
printd((i+0)+j);
printd((i+0)+(j+0));
printd((i+0)+123);
printd(45000+(j+0));
return 0;
}
进行这些更改后,解析器会在第一个 "int" 关键字处阻塞。
感谢您的帮助...
您的字符串处理需要改进。
这是永远不正确的:
yylval.code=yytext;
yytext
指向扫描仪用来保存部分输入的内部临时缓冲区,其内容会发生变化。它甚至可能被重新分配。
如果您需要保留与令牌对应的字符串,则需要制作一个动态分配的副本(并在不再需要时释放该副本)。对于关键字和运算符标记,这可能被认为是不必要的开销;您可以只使用令牌号作为字符串文字查找 table 中的键,或其他一些复制避免机制。
此外,您在解析器中的操作包括如下调用:
strcpy($$, );
但是 strcpy
要求目标是指向已知至少与源一样长的有效字符数组的指针。由于您没有初始化 $$
,这可能被认为是不合理的期望。恰好,$$
被预初始化为$$
,所以上面相当于strcpy(, )
,这是一个错误,不是空操作:strcpy
要求源和目的地不重叠。
一个特别恶劣的例子发生在 declaration_specifiers 的行动中:
char* temp = (char*)malloc((strlen()+ strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
首先,根据关于在您的扫描程序操作中使用 yytext
的观点,</code> 和 <code>
目前不太可能包含有用的信息。其次,$$
与 </code> 相同,因此它指向扫描器的内部缓冲区。这意味着 <code>strcpy
将用随机文本覆盖扫描器的内部缓冲区,可能会超出它。没有什么好处。
顺便说一下,用于创建 temp
的 malloc
的长度计算有误。应该是:
char* temp = malloc(strlen() + strlen() + 2);
因为 space 字符和 NUL 终止符。所以这是另一个缓冲区溢出。 (并且 temp
是不必要的;您应该直接使用 $$
来保存新分配的字符串的地址。)
我正在为一个学校项目使用 Lex&Yacc 制作一个解析器,我的语法分析有一些无法解释的问题。
首先,这是我的 yacc 文件,它不起作用。
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern FILE *yyin;
extern char* yytext;
%}
%token <code> IDENTIFIER CONSTANT SIZEOF
%token <code> PTR_OP LE_OP GE_OP LES_OP GES_OP EQ_OP NE_OP
%token <code> AND_OP OR_OP
%token <code> EXTERN
%token <code> INT VOID
%token <code> STRUCT
%token <code> IF ELSE WHILE FOR RETURN
%union
{
int number;
char* code;
}
%start program
%type <code> primary_expression postfix_expression argument_expression_list unary_expression
%type <code> unary_operator multiplicative_expression additive_expression relational_expression
%type <code> equality_expression logical_and_expression logical_or_expression expression
%type <code> declaration declaration_specifiers type_specifier struct_specifier
%type <code> struct_declaration_list struct_declaration declarator direct_declarator
%type <code> parameter_list parameter_declaration statement compound_statement
%type <code> declaration_list statement_list expression_statement selection_statement
%type <code> iteration_statement jump_statement program external_declaration
%type <code> function_definition
%%
primary_expression
: IDENTIFIER {strcpy($$,);}
| CONSTANT {strcpy($$,);}
| '(' expression ')' {
char* temp = (char*)malloc((2 + strlen())*sizeof(char));
sprintf(temp,"(%s)",);
strcpy($$,temp);
free(temp);
}
| SIZEOF '(' type_specifier ')' {
char* temp = (char*)malloc((8 + strlen())*sizeof(char));
sprintf(temp,"sizeof(%s)",);
strcpy($$,temp);
free(temp);
}
| SIZEOF '(' IDENTIFIER ')' {
char* temp = (char*)malloc((8 + strlen())*sizeof(char));
sprintf(temp,"sizeof(%s)",);
strcpy($$,temp);
free(temp);
}
;
postfix_expression
: primary_expression {strcpy($$,);}
| postfix_expression '(' ')' {
char* temp = (char*)malloc((2 + strlen())*sizeof(char));
sprintf(temp,"%s()",);
strcpy($$,temp);
free(temp);
}
| postfix_expression '(' argument_expression_list ')' {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s(%s)",,);
strcpy($$,temp);
free(temp);
}
| postfix_expression PTR_OP IDENTIFIER {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s->%s",,);
strcpy($$,temp);
free(temp);
}
;
argument_expression_list
: expression {strcpy($$,);}
| argument_expression_list ',' expression {
char* temp = (char*)malloc((1 + strlen()+strlen())*sizeof(char));
sprintf(temp,"%s,%s",,);
strcpy($$,temp);
free(temp);
}
;
unary_expression
: postfix_expression {strcpy($$,);}
| unary_operator unary_expression {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s%s",,);
strcpy($$,temp);
free(temp);}
;
unary_operator
: '&' {strcpy($$,"&");}
| '*' {strcpy($$,"*");}
| '-' {strcpy($$,"-");}
;
multiplicative_expression
: unary_expression {strcpy($$,);}
| multiplicative_expression '*' unary_expression {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s*%s",,);
strcpy($$,temp);
free(temp);
}
| multiplicative_expression '/' unary_expression {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s/%s",,);
strcpy($$,temp);
free(temp);
}
;
additive_expression
: multiplicative_expression {strcpy($$,);}
| additive_expression '+' multiplicative_expression {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s+%s",,);
strcpy($$,temp);
free(temp);
}
| additive_expression '-' multiplicative_expression {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s-%s",,);
strcpy($$,temp);
free(temp);
}
;
relational_expression
: additive_expression {strcpy($$,);}
| relational_expression LES_OP additive_expression {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s<=%s",,);
strcpy($$,temp);
free(temp);
}
| relational_expression GES_OP additive_expression {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s>=%s",,);
strcpy($$,temp);
free(temp);
}
| relational_expression LE_OP additive_expression {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s<%s",,);
strcpy($$,temp);
free(temp);
}
| relational_expression GE_OP additive_expression {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s>%s",,);
strcpy($$,temp);
free(temp);
}
;
equality_expression
: relational_expression {strcpy($$,);}
| equality_expression EQ_OP relational_expression {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s==%s",,);
strcpy($$,temp);
free(temp);
}
| equality_expression NE_OP relational_expression {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s!=%s",,);
strcpy($$,temp);
free(temp);
}
;
logical_and_expression
: equality_expression {strcpy($$,);}
| logical_and_expression AND_OP equality_expression {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s&&%s",,);
strcpy($$,temp);
free(temp);
}
;
logical_or_expression
: logical_and_expression {strcpy($$,);}
| logical_or_expression OR_OP logical_and_expression {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s||%s",,);
strcpy($$,temp);
free(temp);
}
;
expression
: logical_or_expression {strcpy($$,);}
| unary_expression '=' expression {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s=%s",,);
strcpy($$,temp);
free(temp);
}
;
declaration
: declaration_specifiers declarator ';' {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s%s;",,);
strcpy($$,temp);
free(temp);
}
| struct_specifier ';' {
char* temp = (char*)malloc((1 + strlen())*sizeof(char));
sprintf(temp,"%s;",);
strcpy($$,temp);
free(temp);
}
;
declaration_specifiers
: EXTERN type_specifier {
char* temp = (char*)malloc((strlen()+ strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
}
| type_specifier {strcpy($$,);}
;
type_specifier
: VOID {strcpy($$,);}
| INT {strcpy($$,);}
| struct_specifier {strcpy($$,);}
;
struct_specifier
: STRUCT IDENTIFIER '{' struct_declaration_list '}' {
char* temp = (char*)malloc((2 + strlen() + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s {%s}",,,);
strcpy($$,temp);
free(temp);
}
| STRUCT '{' struct_declaration_list '}' {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s {%s}",,);
strcpy($$,temp);
free(temp);
}
| STRUCT IDENTIFIER {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
}
;
struct_declaration_list
: struct_declaration {strcpy($$,);}
| struct_declaration_list struct_declaration {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
}
;
struct_declaration
: type_specifier declarator ';' {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s;",,);
strcpy($$,temp);
free(temp);
}
;
declarator
: '*' direct_declarator {
char* temp = (char*)malloc((1 + strlen())*sizeof(char));
sprintf(temp,"*%s",);
strcpy($$,temp);
free(temp);
}
| direct_declarator {strcpy($$,);}
;
direct_declarator
: IDENTIFIER {strcpy($$,);}
| '(' declarator ')' {
char* temp = (char*)malloc((2 + strlen())*sizeof(char));
sprintf(temp,"(%s)",);
strcpy($$,temp);
free(temp);
}
| direct_declarator '(' parameter_list ')' {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s(%s)",,);
strcpy($$,temp);
free(temp);
}
| direct_declarator '(' ')' {
char* temp = (char*)malloc((2 + strlen())*sizeof(char));
sprintf(temp,"%s()",);
strcpy($$,temp);
free(temp);
}
;
parameter_list
: parameter_declaration {strcpy($$,);}
| parameter_list ',' parameter_declaration {
char* temp = (char*)malloc((1 + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s, %s",,);
strcpy($$,temp);
free(temp);
}
;
parameter_declaration
: declaration_specifiers declarator {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
}
;
statement
: compound_statement {strcpy($$,);}
| expression_statement {strcpy($$,);}
| selection_statement {strcpy($$,);}
| iteration_statement {strcpy($$,);}
| jump_statement {strcpy($$,);}
;
compound_statement
: '{' '}' {strcpy($$,"{}");}
| '{' statement_list '}' {
char* temp = (char*)malloc((2 + strlen())*sizeof(char));
sprintf(temp,"{%s}",);
strcpy($$,temp);
free(temp);
}
| '{' declaration_list '}' {
char* temp = (char*)malloc((2 + strlen())*sizeof(char));
sprintf(temp,"{%s}",);
strcpy($$,temp);
free(temp);
}
| '{' declaration_list statement_list '}' {
char* temp = (char*)malloc((2 + strlen() + strlen())*sizeof(char));
sprintf(temp,"{%s%s}",,);
strcpy($$,temp);
free(temp);
}
;
declaration_list
: declaration {strcpy($$,);}
| declaration_list declaration {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
}
;
statement_list
: statement {strcpy($$,);}
| statement_list statement {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
}
;
expression_statement
: ';' {strcpy($$,";");}
| expression ';' {
char* temp = (char*)malloc((1 + strlen())*sizeof(char));
sprintf(temp,"%s;",);
strcpy($$,temp);
free(temp);
}
;
selection_statement
: IF '(' expression ')' statement {
char* temp = (char*)malloc((2 + strlen() + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s (%s) %s",,,);
strcpy($$,temp);
free(temp);
}
| IF '(' expression ')' statement ELSE statement {
char* temp = (char*)malloc((2 + strlen() + strlen() + strlen() + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s (%s) %s %s %s",,,,,);
strcpy($$,temp);
free(temp);
}
;
iteration_statement
: WHILE '(' expression ')' statement {
char* temp = (char*)malloc((1 + strlen() + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s (%s) %s",,,);
strcpy($$,temp);
free(temp);
}
| FOR '(' expression_statement expression_statement expression ')' statement {
char* temp = (char*)malloc((2 + strlen() + strlen() + strlen() + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s (%s %s %s) %s",,,,,);
strcpy($$,temp);
free(temp);
}
;
jump_statement
: RETURN ';' {strcpy($$,);}
| RETURN expression ';' {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s;",,);
strcpy($$,temp);
}
;
program
: external_declaration {strcpy($$,);}
| program external_declaration {
char* temp = (char*)malloc((strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
}
;
external_declaration
: function_definition {strcpy($$,);}
| declaration {strcpy($$,);}
;
function_definition
: declaration_specifiers declarator compound_statement {
char* temp = (char*)malloc((strlen() + strlen() + strlen())*sizeof(char));
sprintf(temp,"%s %s %s",,,);
strcpy($$,temp);
free(temp);
}
;
%%
int yyerror(char *s) {
printf("%s\n",s);
printf("%s",yytext);
}
int main(int argc, char *argv[]) {
yyin = fopen(argv[1],"r");
yyparse();
return 0;
}
这个解析器的最终目的是生成一个带有初始代码的 3 地址代码(接近于旧的 C 语言)。
现在,我只是创建一个简单的解析器来测试输入文件是否以我的第一语言正确编写。
在我的实际代码中,这里的语义操作是为了构建与输入相同的代码,然后将其写入外部文件。
这对你来说可能看起来完全是白痴,但这是我的第一个编译器项目,我认为如果我想在未来生成代码(因为我只想要代码翻译),这对我来说是至关重要的一步。
这就是问题所在。在执行我的语义动作之前,解析器可以完美地解析测试文件,没有语法错误。但是现在,它不再起作用了。
语义动作不影响语法分析,我们确定吗?
PS: 这是我的 lex 文件:
chiffre [0-9]
lettre [a-zA-Z]
exposant [Ee][+-]?{D}+
commentaire [\/][\*][^\/\*]*[\*][\/]
identificateur {lettre}({lettre}|_|{chiffre})*
entier {chiffre}+
pointeur [-][>]
%{
#include <stdio.h>
#include "y.tab.h"
/* Ne gère pas les commentaires. A rajouter */
/* Supprimer les lignes dont vous n'avez pas besoin. */
%}
%%
"else" {yylval.code=yytext;return ELSE;}
"extern" {yylval.code=yytext;return EXTERN;}
"for" {yylval.code=yytext;return FOR;}
"if" {yylval.code=yytext;return IF;}
"int" {yylval.code=yytext;return INT;}
"return" {yylval.code=yytext;return RETURN;}
"sizeof" {yylval.code=yytext;return SIZEOF;}
"struct" {yylval.code=yytext;return STRUCT;}
"void" {yylval.code=yytext;return VOID;}
"while" {yylval.code=yytext;return WHILE;}
{entier} {yylval.code=yytext;return CONSTANT; }
{identificateur} {yylval.code=yytext;return IDENTIFIER;}
"<" {yylval.code=yytext;return LE_OP;}
">" {yylval.code=yytext;return GE_OP;}
"<=" {yylval.code=yytext;return LES_OP;}
">=" {yylval.code=yytext;return GES_OP;}
"==" {yylval.code=yytext;return EQ_OP;}
"!=" {yylval.code=yytext;return NE_OP;}
"&&" {yylval.code=yytext;return AND_OP;}
"||" {yylval.code=yytext;return OR_OP;}
"*" {yylval.code=yytext;return '*';}
"-" {yylval.code=yytext;return '-';}
"&" {yylval.code=yytext;return '&';}
{pointeur} {yylval.code=yytext;return PTR_OP;}
"+" {yylval.code=yytext;return '+';}
"/" {yylval.code=yytext;return '/';}
{commentaire} {/* commentaire ne rien faire */}
"," {yylval.code=yytext;return ',';}
";" {yylval.code=yytext;return ';';}
"(" {yylval.code=yytext;return '(';}
")" {yylval.code=yytext;return ')';}
"{" {yylval.code=yytext;return '{';}
"}" {yylval.code=yytext;return '}';}
"=" {yylval.code=yytext;return '=';}
[ \t\n\v\r]+ { /* rien faire */ }
. { printf("erreur : b");printf("%s",yytext);printf("a\n");printf("Erreur lexicale. \n"); }
%%
最后是我的测试文件之一
extern int printd( int i );
int main() {
int i;
int j;
i = 45000;
j = -123;
printd(i+j);
printd(45000+j);
printd(i+123);
printd(45000+123);
printd(i+(j+0));
printd((i+0)+j);
printd((i+0)+(j+0));
printd((i+0)+123);
printd(45000+(j+0));
return 0;
}
进行这些更改后,解析器会在第一个 "int" 关键字处阻塞。
感谢您的帮助...
您的字符串处理需要改进。
这是永远不正确的:
yylval.code=yytext;
yytext
指向扫描仪用来保存部分输入的内部临时缓冲区,其内容会发生变化。它甚至可能被重新分配。
如果您需要保留与令牌对应的字符串,则需要制作一个动态分配的副本(并在不再需要时释放该副本)。对于关键字和运算符标记,这可能被认为是不必要的开销;您可以只使用令牌号作为字符串文字查找 table 中的键,或其他一些复制避免机制。
此外,您在解析器中的操作包括如下调用:
strcpy($$, );
但是 strcpy
要求目标是指向已知至少与源一样长的有效字符数组的指针。由于您没有初始化 $$
,这可能被认为是不合理的期望。恰好,$$
被预初始化为$$
,所以上面相当于strcpy(, )
,这是一个错误,不是空操作:strcpy
要求源和目的地不重叠。
一个特别恶劣的例子发生在 declaration_specifiers 的行动中:
char* temp = (char*)malloc((strlen()+ strlen())*sizeof(char));
sprintf(temp,"%s %s",,);
strcpy($$,temp);
free(temp);
首先,根据关于在您的扫描程序操作中使用 yytext
的观点,</code> 和 <code>
目前不太可能包含有用的信息。其次,$$
与 </code> 相同,因此它指向扫描器的内部缓冲区。这意味着 <code>strcpy
将用随机文本覆盖扫描器的内部缓冲区,可能会超出它。没有什么好处。
顺便说一下,用于创建 temp
的 malloc
的长度计算有误。应该是:
char* temp = malloc(strlen() + strlen() + 2);
因为 space 字符和 NUL 终止符。所以这是另一个缓冲区溢出。 (并且 temp
是不必要的;您应该直接使用 $$
来保存新分配的字符串的地址。)