我正在编写一个 lex 代码，其中的正则表达式部分与详细部分中的完全一样。我的主要问题是文本的 RE

Question

    %{
    #define  FUNCT      300
    #define  IDENTIFIER 301
    #define  ASSGN      302
    #define  INTEGER    303
    #define  PRINT      304
    #define  TEXT       305
    #define  INPUT      306
    #define  CONTINUE   307
    #define  RETURN     308
    #define  IF         309
    #define  THEN       310
    #define  ENDIF      311
    #define  ELSE       312
    #define  WHILE      313
    #define  DO         314
    #define  ENDDO      315
    #define  END        316
    
    #include<stdio.h>
    #include<string.h>
    #include<stdlib.h>
    
    #define MAX_SYM 200
    int found;
    void initialize();   
    void create(char *lexeme, int scope, char type, char usage);
    int readsymtab(char *lexeme, int scope, char usage); 
    %}
    
    %%
    [\t ]+                {}
    =                     {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(ASSGN)                            ;}
    print                 {int found = readsymtab(yytext,0,'L');   //line 39
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(PRINT)                            ;}
    input                 {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(INPUT)                            ;}
    continue              {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(CONTINUE)                         ;}
    return                {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(RETURN)                           ;}
    if                    {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(IF)                               ;}
    then                  {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(THEN)                             ;}
    endif                 {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(ENDIF)                            ;}
    else                  {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(ELSE)                             ;}
    while                 {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(WHILE)                            ;}
    do                    {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(DO)                               ;}
    enddo                 {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(ENDDO)                            ;}
    end                   {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(END);
                           exit(0);                                 ;}
    funct                 {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(FUNCT)                            ;}
    [0-9]+                {int found = readsymtab(yytext,0,'L');
                           if(found == -1)
                           {
                            create(yytext,0,'I','L');
                           };
                           return(FUNCT)                            ;}
    [a-zA-Z]+             {int found = readsymtab(yytext,0,'I');
                           if(found == -1)
                           {
                            create(yytext,0,'S','I');
                           };
                           return(IDENTIFIER)                       ;}
    \"[^\"\n]+|[\n]+\"   {int found = readsymtab(yytext,0,'L');  //line130
                           if(found == -1)
                           {
                            create(yytext,0,'S','L');
                           };
                           return(TEXT)                             ;}
    .                     {return(yytext[0])                        ;}
    %%
    
    
    
    //new variable declaration
    
    int num;
    int scope;
    struct symbtab                    
    {
        char Lexeme [18];
        int Scope;
        char Type;
        char Usage;
        int Reference;
    };
    struct symbtab arr_symtab[200];                                //data structure in which the symbol table entries are stored
    
    void print_fn()                                                //function which actually prints the symbol tabel in columnar form             
    {
        int rows;
        
        printf("Row No Lexeme           Scope Type Usage Reference\n");
    
        for (rows=0; rows<=num; rows++){
            printf("%6d %-16s %-7d %-7c %-7c %-7d \n",rows, arr_symtab[rows].Lexeme,arr_symtab[rows].Scope,arr_symtab[rows].Type,arr_symtab[rows].Usage,arr_symtab[rows].Reference);
        }
    }
    
    void initialize()                                              //function which enteres the initial value into the symbol table              
    {
        num = -1;
        int scope = 0;
        char lexeme[18]= "FRED";
        char type = 'I';
        char usage = 'L';
        create(lexeme,scope,type,usage);   
    }
    
    void create(char *lexeme, int scope, char type, char usage)    //function which creates a new entry in the symbol table                                                                     
    {
        
        int reference;
        if(type=='I' && usage =='L')
             reference = atoi(lexeme);
        else
             reference = -1;
    
        num = num+1;
        strcpy(arr_symtab[num].Lexeme, lexeme); 
        arr_symtab[num].Scope = scope;
        arr_symtab[num].Type = type;
        arr_symtab[num].Usage = usage;
        arr_symtab[num].Reference = reference;
        
    }
    
    int readsymtab(char *lexeme, int scope, char usage)                 //function which checks if the entry is already in the table or not and the takes the required action                                                              
    {
        for(int i=num; i>=0; i--){
            int comp = strcmp(arr_symtab[i].Lexeme, lexeme);
           if(comp==0 && arr_symtab[i].Scope==scope && arr_symtab[i].Usage==usage)
           {
               return i;
           }
           else
           {
               return -1;
           }
        }
    }
    
    int main()
    {
        //other lines
        printf("\n COURSE: CSCI50200 NAME: Aryan Banyal NN: 01 Assignment #: 04 \n");
        initialize();
        yylex();
        print_fn();
        printf("End of test.\n");
        return 0;
    }
    
    int yywrap ()
    {
        return 1;
    }

以下是打印“aryan banyal”的输出

     COURSE: CSCI50200 NAME: Aryan Banyal NN: 01 Assignment #: 04 
    --(end of buffer or a NUL)
    --accepting rule at line 39 ("print")
    Row No Lexeme           Scope Type Usage Reference
         0 FRED             0       I       L       0       
         1 print            0       S       L       -1      
    End of test.

如您所见，它甚至没有转到“aryan banyal”部分，只是打印并退出... 以下是“aryan banyal”

的输出

     COURSE: CSCI50200 NAME: Aryan Banyal NN: 01 Assignment #: 04 
    --(end of buffer or a NUL)
    --accepting rule at line 130 (""aryan banyal")
    Row No Lexeme           Scope Type Usage Reference
         0 FRED             0       I       L       0       
         1 "aryan banyal    0       S       L       -1      
    End of test.

第 1 行应该是 aryan banyal，但由于某种原因前面有一个 "。

Answer 1

您有（至少）三个（有些）不相关的问题。

使用词法扫描器

您的代码在读取单个标记后停止，因为您只调用了 yylex() 一次（并忽略了它 return 的内容）。 yylex() return 每次调用时都是一个标记；如果要扫描整个文件，则需要在循环中调用它。遇到输入结束会return0。

理解模式

模式\"[^\"\n]+|[\n]+\"中间有一个|；该运算符匹配它周围的任一模式。所以你正在匹配 \"[^\"\n]+ 或 [\n]+\"。第一个匹配单个双引号，后跟任意数量的字符（但至少一个），不能是引号或换行符。这样匹配 "aryan banyal 没有结束引号但包括开始引号。备选方案的后半部分将匹配任意数量的字符（同样，至少一个），所有这些字符要么是反斜杠，要么是字母 n，然后是一个双引号。

（我不明白这个模式背后的想法，它几乎肯定不是你想要的。如果你在 "aryan banyal 的比赛后再次调用 yylex，结束报价会not 已被匹配，因为它将是紧接的下一个字符，并且该模式坚持认为它前面至少有一个反斜杠或 n。（也许你打算这样做是一个换行符，但也没有一个。）

我想您可能想要匹配整个带引号的字符串，然后只保留引号之间的部分。如果您正确编写了模式，那就是它会匹配的内容，然后您需要删除双引号。我将把编写正确的模式作为练习。您可能想阅读 Flex 手册中的简短 description of Flex patterns；您的 class 笔记中可能也有一些信息。

只选择匹配的一部分

删除令牌开头的引号很容易。所需要的只是在 yytext 上加一。要去掉末尾的那个，您需要用 [=22=] 覆盖它，从而提前一个字符终止字符串。这很容易做到，因为 Flex 在变量 yyleng 中为您提供了匹配的长度。所以你可以设置 yytext[yyleng - 1] = '[=24=]' 然后用 yytext + 1.

调用你的符号 table 函数

如果以上段落没有意义，您应该复习任何关于 C 中字符串处理的介绍性文本。请记住，在 C 中，字符串不过是一个以 0 结尾的单个字符（小整数）的数组。这使得一些事情很容易做，而另一些事情则有点痛苦（但从不神秘）。

我正在编写一个 lex 代码，其中的正则表达式部分与详细部分中的完全一样。我的主要问题是文本的 RE

I am writing a lex code in which the regular expression section is given exactly like in the detailed section. Main problem I have is the RE for text

c

compiler-construction

lex

使用词法扫描器

理解模式

只选择匹配的一部分