lex/flex对C/C++嵌套`#include "Header"`语法的实现的解释?

Explanation for implementation of C/C++ nested `#include "Header"` grammar by lex/flex?

我正在研究lex/flex中的起始状态和嵌套输入文件

在书 flex 和 bison 中,我对 C/C++ #include "Header" 语法的示例实现感到困惑:

这是示例 lex 的一部分:

/* Companion source code for "flex & bison", published by O'Reilly
 * Media, ISBN 978-0-596-15597-1
 * Copyright (c) 2009, Taughannock Networks. All rights reserved.
 * See the README file for license conditions and contact info.
 * $Header: /home/johnl/flnb/code/RCS/fb2-3.l,v 2.3 2010/01/04 02:43:58 johnl Exp $
 */

/* fb2-3 skeleton for include files */

%option noyywrap warn nodefault
%x IFILE
  struct bufstack {
    struct bufstack *prev;  /* previous entry */
    YY_BUFFER_STATE bs;     /* saved buffer */
    int lineno;         /* saved line number */
    char *filename;     /* name of this file */
    FILE *f;            /* current file */
  } *curbs = 0;

  char *curfilename;        /* name of current input file */

  int newfile(char *fn);
  int popfile(void);

%%
^"#"[ \t]*include[ \t]*[\"<] { BEGIN IFILE; }

<IFILE>[^ \t\n\">]+          { 
                             { int c;
                   while((c = input()) && c != '\n') ;
                 }
                 yylineno++;
                 if(!newfile(yytext))
                                yyterminate(); /* no such file */
                 BEGIN INITIAL;
                           }

<IFILE>.|\n                { fprintf(stderr, "%4d bad include line\n", yylineno);
                     yyterminate();
               }
^.                         { fprintf(yyout, "%4d %s", yylineno, yytext); }
^\n                        { fprintf(yyout, "%4d %s", yylineno++, yytext); }
\n                         { ECHO; yylineno++; }
.                          { ECHO; }
<<EOF>>                    { if(!popfile()) { fprintf(yyout, "end of file, total lines: %4d %s", yylineno, yytext); yyterminate();}  }
%%

main(int argc, char **argv)
{
  if(argc < 2) {
    fprintf(stderr, "need filename\n");
    return 1;
  }
  if(newfile(argv[1]))
    yylex();
}

int
  newfile(char *fn)
{
  FILE *f = fopen(fn, "r");
  struct bufstack *bs = malloc(sizeof(struct bufstack));

  /* die if no file or no room */
  if(!f) { perror(fn); return 0; }
  if(!bs) { perror("malloc"); exit(1); }

  /* remember state */
  if(curbs)curbs->lineno = yylineno;
  bs->prev = curbs;

  /* set up current entry */
  bs->bs = yy_create_buffer(f, YY_BUF_SIZE);
  bs->f = f;
  bs->filename = fn;
  yy_switch_to_buffer(bs->bs);
  curbs = bs;
  yylineno = 1;
  curfilename = fn;
  return 1;
}

int
  popfile(void)
{
  struct bufstack *bs = curbs;
  struct bufstack *prevbs;

  if(!bs) return 0;

  /* get rid of current entry */
  fclose(bs->f);
  yy_delete_buffer(bs->bs);

  /* switch back to previous */
  prevbs = bs->prev;
  free(bs);

  if(!prevbs) return 0;

  yy_switch_to_buffer(prevbs->bs);
  curbs = prevbs;
  yylineno = curbs->lineno;
  curfilename = curbs->filename;
  return 1; 
}

请帮我解决这些问题:

  1. 为什么 <IFILE>[^ \t\n\">]+ 匹配 Header 的末尾 ">
  2. 为什么使用 { int c; while((c = input()) && c != '\n') ; } 吃掉所有字符直到行尾 \nyytext 会完全匹配 Header 文件名吗?
  3. 如何实现像 java import java.util.Decoder; 这样的语法?

Why [^ \t\n\">]+ matches the end " or > of the Header?

答案是:不会

但它所做的是匹配所有字符 直到 那些(和 space、制表符和换行符),当你到达这些字符时匹配停止。因此,当您有一个匹配项并且执行规则的代码时,您知道匹配项之后文件中的下一个字符必须是 "> 或 white-space字符.

举个例子:

#include <foo/bar.h>
  • 规则 ^"#"[ \t]*include[ \t]*[\"<] 匹配 #include <
  • 规则 <IFILE>[^ \t\n\">]+ 匹配 foo/bar.h

当你运行代码

int c;
while((c = input()) && c != '\n') ;

它会从读取结尾>开始,然后继续读取并丢弃所有剩余的字符,直到行尾。

要验证这一点,您可以在循环中添加一些字符输出。