为什么 Bison (Yacc) 显然无缘无故地打印新行?

Why does Bison (Yacc) print new lines for apparently no reason?

我正在尝试使用 Flex 和 Bison 为一种简单的格式化语言构建一个编译器。我还处于起步阶段,我已经编写了一些功能。

虽然在这个阶段,我仍然没有在任何地方打印任何东西到 yyout。我有一些错误情况,其中输出文件中打印了一些内容,但此输入显然不会发生这种情况。我的所有其他打印语句都将打印到控制台。所以,我希望输出文件完全是空的。但是,当我尝试使用以下内容作为我的输入文件时:

\begin {document}

\tabsize( 5)
\title{"Why I Love Compiler Design"}
\author{"COMP421 Student"}
\date{29/12/2016}
\pagesetup{30,100 }

\end{document}

生成的输出文件是:

有 9 行空行,对应于我在输入文件中的 9 行。然而,我期望的输出只有 1 个空行。

这是我的 .l 文件:

%{
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include "y.tab.h"
    void yyerror(const char *);
    int yylex(void);
    /* "Connect" with the output file  */
    extern FILE *yyout;
    extern int  yyparse();
%}

/* Allows printing the line number (of an error) */
%option yylineno

%%

^\ { printf("LEX returned token BSLASH\n"); return BSLASH; } /* every backslash has to be at the start of a line */
\{  { printf("LEX returned token LBRACE\n"); return LBRACE; }
\}  { printf("LEX returned token RBRACE\n"); return RBRACE; }
\(  { printf("LEX returned token LPAREN\n"); return LPAREN; }
\)  { printf("LEX returned token RPAREN\n"); return RPAREN; }
,   { printf("LEX returned token COMMA\n"); return COMMA; }

begin    { printf("LEX returned token BEGIN_\n"); return BEGIN_; } /* Note the use of \ in this and other regex expressions to escape the following symbols: \, {, }, (, ), */
end      { printf("LEX returned token END\n"); return END; }
document { printf("LEX returned token DOCUMENT\n"); return DOCUMENT; }

pagesetup { printf("LEX returned token PAGESETUP\n"); return PAGESETUP; }
tabsize   { printf("LEX returned token TABSIZE\n"); return TABSIZE; }
title     { printf("LEX returned token TITLE\n"); return TITLE; }
author    { printf("LEX returned token AUTHOR\n"); return AUTHOR; }
date      { printf("LEX returned token DATE\n"); return DATE; }

(((0[1-9]|[12][0-9]|30)[-/ ]?(0[13-9]|1[012])|31[-/ ]?(0[13578]|1[02])|(0[1-9]|1[0-9]|2[0-8])[-/ ]?02)[-/ ]?[0-9]{4}|29[-/ ]?02[-/ ]?([0-9]{2}(([2468][048]|[02468][48])|[13579][26])|([13579][26]|[02468][048]|0[0-9]|1[0-6])00))  { printf("LEX returned token DDMMYYYYDATE\n"); yylval.sValue = yytext; return DDMMYYYYDATE; }
[0-9]*[1-9][0-9]*   { printf("LEX returned token INTEGER\n"); yylval.iValue = atoi(yytext); return INTEGER; }
\".*\"              { printf("LEX returned token STRING\n"); yylval.sValue = yytext; return STRING; }

    /* skip whitespace which is not part of a string */
[ \t] ;

    /* anything else is an error */
. yyerror("invalid character");

%%

int main(int argc, char *argv[]) {
    if ( argc != 3)
        yyerror("ERROR You need 2 args: inputFileName outputFileName");
    else {
        yyin = fopen(argv[1], "r");
        yyout = fopen(argv[2], "w");
        yyparse();
        fclose(yyin);
        fclose(yyout);
    }

    return 0;
}

这是我的 .y 文件:

%{
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include "y.tab.h"
    void yyerror(const char *);
    int yylex(void);

    /* "Connect" with the output file  */
    extern FILE *yyout;
    extern int  yylineno;

    /* An array with counters of how many times each of the 5 document properties appears in the input file. The order of the properties is defined in the enum below */
    int docPropertyCounters[5];

    /* An enumerated list with the 5 document properties */
    typedef enum {PAGE_SETUP, TAB_SIZE, DOC_TITLE, DOC_AUTHOR, DOC_DATE} document_property;

    /* Takes an integer and returns the corresponding document property as a string. The order is based on the enum. */
    static inline char *stringFromDocPropertyEnum(document_property indexOfProperty) {
        static char *strings[] = { "\pagesetup{}", "\tabsize()", "\title{}", "\author{}", "\date{}"};
        return strings[indexOfProperty];
    }
%}

%union { 
    int iValue;      /* integer value */ 
    char* sValue;      /* C-String */ 
}; 

%start file /* defining the start condition */

%token BSLASH LBRACE RBRACE LPAREN RPAREN COMMA

%token BEGIN_ END DOCUMENT /* BEGIN seems to be a reserved word so BEGIN_ was used instead */

%token PAGESETUP TABSIZE TITLE AUTHOR DATE

%token <iValue> INTEGER

%token <sValue> DDMMYYYYDATE STRING

%%

file: beginDocument docProperties endDocument
            { 
                /* Checks for all possible errors in document properties */
                for (int i = 0; i < sizeof(docPropertyCounters)/sizeof(docPropertyCounters[0]); i++) 
                    if (docPropertyCounters[i] < 1) 
                        /* yyerror() is not used in this function because the line number does not need to be shown */
                        fprintf(stderr, "SYNTAX ERROR: Your source file does not contain the required document property %s", stringFromDocPropertyEnum(i)); 
                    else if (docPropertyCounters[i] > 1) 
                        fprintf(stderr, "SYNTAX ERROR: Your source file contains more than one instance of the document property %s", stringFromDocPropertyEnum(i));
            }
          | /* An empty document is parsed to an empty document, no errors generated*/
          ;

beginDocument: BSLASH BEGIN_ LBRACE DOCUMENT RBRACE;

docProperties: docProperties docProperty
               | /* empty */
               ;                

    /* required properties... there should be one instance of each in the input file */ 
docProperty:    pageSetupProperty { docPropertyCounters[PAGE_SETUP]++; }
                | tabSizeProperty { docPropertyCounters[TAB_SIZE]++; }
                | titleProperty   { docPropertyCounters[DOC_TITLE]++; }
                | authorProperty  { docPropertyCounters[DOC_AUTHOR]++; }
                | dateProperty    { docPropertyCounters[DOC_DATE]++; }
                ;   

pageSetupProperty: BSLASH PAGESETUP LBRACE INTEGER COMMA INTEGER RBRACE;

tabSizeProperty: BSLASH TABSIZE LPAREN INTEGER RPAREN;

titleProperty: BSLASH TITLE LBRACE STRING RBRACE;

authorProperty: BSLASH AUTHOR LBRACE STRING RBRACE;

dateProperty: BSLASH DATE LBRACE DDMMYYYYDATE RBRACE;

endDocument: BSLASH END LBRACE DOCUMENT RBRACE;

%%

int yywrap(void) {
    return 1;
}

void yyerror(const char* str) 
{
    fprintf(stderr,"SYNTAX ERROR near line [%d]: %s\n",yylineno, str);
}

PS:我正在使用 Windows 10 和相当旧的 flex 版本 (2.5.4a)。

这些行包含回车 return and/or 换行符 \r\n 因为您没有将其放入空白模式。

也许你应该:

[ \t\r\n]      ;

您还应注意在规范中使用 C 样式注释。有时这些被视为模式。我总是建议学生只在实际的 C 代码中添加 C 风格的注释。例如,最好这样做:

[ \t\r\n]      ;  /* skip whitespace which is not part of a string */

永远不要在别处发表评论。其他人可能不同意,但我发现它避免了 flex 和 bison 中的很多麻烦。

PS:我还没有在你的代码上测试我的建议....