flex 如何识别子字符串?

How flex recognizes substrings?

刚接触flex,从事c语言的词法分析。我想输出所有关键字、标识符、文字、运算符和分隔符。 这是我的程序 lexer.l,它不起作用。

%{
#include<stdio.h>
int currentLine=1;
%}
%%
#include<.*> printf("%d\t<%s,%s>\n",currentLine,"include","PreProcessor");
#define[^\n]+  printf("%d\t<%s,%s>\n",currentLine,"define","PreProcessor");
= {printf("%d\t<%s,%s>\n",currentLine,yytext,"AssignmentOperator");}
int|short|signed|unsigned|long|double|float|char|void|enum|union|struct|auto|const|register|static|volatile|extern|typedef|if|else|while|do|for|switch|case|continue|break|default|sizeof|goto|return   {printf("%d\t<%s,%s>\n",currentLine,yytext,"Keyword");}
[\t ]   ;
\n currentLine++;
(\"[^\"]*\")    {printf("%d\t<%s,%s>\n",currentLine,yytext,"String Literal");}
\( printf("%d\t<%s,%s>\n",currentLine,yytext,"parenthesisOpen");
\) printf("%d\t<%s,%s>\n",currentLine,yytext,"parenthesisClose");
\{ printf("%d\t<%s,%s>\n",currentLine,yytext,"blockOpen");
\} printf("%d\t<%s,%s>\n",currentLine,yytext,"blockClose");
"+"|"-"|"/"|"*"|"<="    {printf("%d\t<%s,%s>\n",currentLine,yytext,"ArithmeticOperator");}
(\&\&)|(\|\|)|! printf("%d\t<%s,%s>\n",currentLine,yytext,"LogicalOperator");
&|\||~ printf("%d\t<%s,%s>\n",currentLine,yytext,"BitwiseOperator");
\/\/[^\n] printf("%d\t<%s,%s>\n",currentLine,yytext,"SingleLineComment");
(\/\*.*\*\/)    printf("%d\t<%s,%s>\n",currentLine,yytext,"MultiLineComment");
;   printf("%d\t<%s,%s>\n",currentLine,yytext,"Separator");
.* printf("%s\tany match\n",yytext);
%%

int yywrap(){
    return 1;
}

int main(int argc, char *argv[]){

if(argc!=2){
    printf("Invalid arguments !\n Usage: lexgen <filename>\n");
    return 1;
}
yyin=fopen(argv[1],"r");
if(yyin==0){
    printf("File not found !\n");
    return 2;
}
printf("Lexical Analyser for C :-\n");
printf("Line\tToken\n");
yylex();
fclose(yyin);
return 0;
}

输入文件:

#include<stdio.h>
#define PI 3.14
int a=5;
double
< + - *
<= >= ! ~
"hskldjh";

这是另一个有效的程序 tmp.l(它适用于 int a=5;至于 lexer.l 它只是忽略)

%{
#include<stdio.h>
#include<string.h>
char err[20][50],name[20][20];
int lno=1,cnt=0,ecnt=0,elno[20];
void st_add(char *);
%}

%%
[0-9]+   {printf("%d %s Number\n",lno,yytext);}
[+-/*]   {printf("%d %s Operator\n",lno,yytext);}
=   {printf("%d %s Assignment\n",lno,yytext);}
main|return|include|if|else|switch|cin|cout|using|namespace|std {printf("%d %s Keyword\n",lno,yytext);}
int|double|char|float {printf("%d %s Data type\n",lno,yytext);}
[\t ]      ;
\n   {lno++;}
(\/\/.*) ;
(\/\*[^*/]*\*\/) ;
(\/\*[^*/]*)  {elno[ecnt]=lno;char str[100]="Unterminated comment";strcpy(err[ecnt],str);ecnt++;}
printf|scanf  {printf("%d %s Library function\n",lno,yytext);}
[a-z]+[a-zA-Z0-9]* {printf("%d %s Identifier\n",lno,yytext);st_add(yytext);}
([a-zA-Z0-9]+\.h) {printf("%d %s Header\n",lno,yytext);}
\(   {printf("%d %s Open bracket\n",lno,yytext);}
\)   {printf("%d %s Close bracket\n",lno,yytext);}
\<<   {printf("%d %s insertion\n",lno,yytext);}
\>>   {printf("%d %s extraction\n",lno,yytext);}
\{   {printf("%d %s Block start\n",lno,yytext);}
\}   {printf("%d %s Block end\n",lno,yytext);}
#   {printf("%d %s Preprocessor\n",lno,yytext);}
;   {printf("%d %s Terminator\n",lno,yytext);}
(\"[^\"]*\")  {printf("%d %s String literal\n",lno,yytext);} 
(\"[^\"\n]*\n)  {elno[ecnt]=lno;char str[100]="Unterminated quote";strcpy(err[ecnt],str);ecnt++;lno++;}

[0-9]+[a-zA-z]*  {elno[ecnt]=lno;char str[100]="Unrecognized token";strcpy(err[ecnt],str);ecnt++;}  
%%
void st_add(char s[20])
{
int i;
for(i=0;i<cnt;i++)
{
if(strcmp(name[i],s)==0)
return;
}
strcpy(name[cnt],s);
cnt++;
}

main()
{
char file[20];
printf("Enter file name:");
scanf("%s",file);
yyin=fopen(file,"r");
printf("Line No. Lexeme  Token\n");
yylex();

printf("Number of errors: %d\n",ecnt);
int i=0;
for(i=0;i<ecnt;i++)
 printf("Line no.: %2d  %s\n",elno[i],err[i]);

printf("\nSymbol Table\n");
for(i=0;i<cnt;i++)
 printf("%s\n",name[i]);
return 0;
}

int yywrap()
{
return 1;
}

tmp.l 输出:

Line No. Lexeme  Token
1 # Preprocessor
1 include Keyword
<1 stdio.h Header
>2 # Preprocessor
2 define Identifier
PI2 3 Number
2 . Operator
2 14 Number
3 int Data type
3 a Identifier
3 = Assignment
3 5 Number
3 ; Terminator
4 double Data type
<5 + Operator
5 - Operator
5 * Operator
<6 = Assignment
>6 = Assignment
!~7 "hskldjh" String literal
7 ; Terminator
Number of errors: 0

Symbol Table
define
a

lexer.l 输出:

Lexical Analyser for C :-
Line    Token
1   <include,PreProcessor>
2   <define,PreProcessor>
int a=5;    any match
4   <double,Keyword>
< + - * any match
<= >= ! ~   any match
"hskldjh";  any match

它甚至不匹配关系算术运算符的正则表达式。如果输入是 int 只有它显示 int,keyword 但如果输入是 int a=5 它忽略但至于 tmp.l 它工作得很好! 我应该如何在 flex 中编写规则有特定的顺序吗?

(F)lex 总是使用具有最长匹配的规则。规则 .* 匹配直到行尾的输入,这将是比任何其他规则更长的匹配,除非标记位于行尾。