小语种如何使用EBNF文法?

How to use EBNF grammar for a small language?

我正在做一个用 EBNF 语法解析文件的作业,我对递归函数的工作有点困惑。

-前三个函数:

Prog ::= PROGRAM IDENT; DeclBlock ProgBody
DeclBlock ::= VAR {DeclStmt;}
DeclStmt ::= Ident {, Ident} : (Integer | Real | String) 

我知道您应该先调用 Prog,然后调用 DeclBlock,然后调用 DeclStmt。那么调用DeclBlock时,是看到分号再调用还是看到另一个IDENT再调用? DeclStmt也是一样,是不是看到逗号再调用?

//Prog ::= PROGRAM IDENT; DeclBlock ProgBody
bool Prog(istream& in, int& line){
    bool status;
    LexItem t;
    
    t = Parser::GetNextToken(in, line);
    cout << t << endl;

    if(t != PROGRAM){
        ParseError(line, "Missing PROGRAM");
        return false;
    }

    LexItem i = Parser::GetNextToken(in,line);

    cout << i << endl;
    if(i != IDENT){
        ParseError(line, "Missing Program Name");
        return false;
    }

    LexItem semi = Parser::GetNextToken(in, line);
    cout << semi << endl;
    if(semi != SEMICOL){
        ParseError(line, "Missing SemiColon");
        return false;
    }

    status = DeclBlock(in, line);

    if(!status){
        ParseError(line, "Incorrect Declaration Section.");
        return false;
    }

    LexItem b = Parser::GetNextToken(in, line);

    cout << "here at b" << b << endl;

    if(b != BEGIN){
        ParseError(line, "Non-recognizable Program Block");
        ParseError(line, "Incorrect Program Section");
        return false;
    }

    status = ProgBody(in, line);

    if(!status){
        ParseError(line, "Incorrect Program Block");
        return false;
    }

    LexItem e = Parser::GetNextToken(in, line);

    cout << e << endl;

    if(e != END){
        ParseError(line, "Non-recognizable Program Block");
        ParseError(line, "Incorrect Program Section");
        return false;
    }

    return true;
}
//DeclBlock ::= VAR {DeclStmt;}
bool DeclBlock(istream& in, int& line){
    bool status = false;
    
    LexItem v = Parser::GetNextToken(in, line);

    cout << v << endl;

    if(v != VAR){
        ParseError(line, "Non-recognizable Declaration Block.");
        return false;
    }

    status = DeclStmt(in, line);

    if(!status){
        ParseError(line, "Syntactic error in Declaration Block.");
        return false;
    }
    return true;
}

//DeclStmt ::= Ident {, Ident} : (Integer | Real | String)
bool DeclStmt(istream& in, int& line){
    bool status = false;
    LexItem tok = Parser::GetNextToken(in, line);
    cout << "here too " <<  tok << endl;

    if (defVar.find(tok.GetLexeme()) != defVar.end()) {
        cout << "Var Exists!" << endl;
        ParseError(line, "Var cant be redeclared");
        return false;
    }
    else{
        defVar.insert({tok.GetLexeme(), true});
    }

    LexItem c = Parser::GetNextToken(in, line);

    cout << c << endl;

    if(c == COMMA){
        //cout << "before calling declStmt" << endl;
        status = DeclStmt(in, line);
        //cout << "after calling declStmt" << endl;
    }
    else if(c.GetToken() == IDENT){
        ParseError(line, "Unrecognized Input Pattern");
        cout << "( here " << c.GetLexeme() << ")" << endl;
        return false;
    }
    // else if(c == IDENT){
    //  ParseError(line, "Missing comma");
    //  return false;
    // }
    else{
        // Parser::PushBackToken(c);

        if(c != COLON){
            ParseError(line, "Missing Colon");
            return false;
        }

        LexItem t = Parser::GetNextToken(in, line);

        cout << "here t " << t.GetLexeme() << endl;

        if(t.GetLexeme() != "REAL" && t.GetLexeme() != "INTEGER" && t.GetLexeme() != "STRING"){
            ParseError(line, "Incorrect Declaration Type.");
            return false;
        }

        LexItem semi = Parser::GetNextToken(in,line);

        cout << semi << endl;

        if(semi != SEMICOL){
            ParseError(line, "Missing SemiColon");
            return false;
        }

        

        return true;
    }

    return status;
}

DeclBlock 只能用这种语言调用一次。 DeclStmt 可以调用多次。 DeclStmt 被定义为一个或多个 Ident 后跟 :,再跟一个类型,然后以 ;.

结尾

阅读 DeclStmt 末尾的 ; 后,您将阅读下一个标记以决定下一步做什么。如果下一个标记是另一个 Ident,你知道你在另一个 DeclStmt 的开始,所以你会再次调用它。如果是其他情况,您知道您正处于 ProgBody 的开头。 (我假设最后一位。通常你会寻找以 ProgBody 开头的标记,但未显示。)

您的 DeclBlock 函数应该类似于

bool DeclBlock(istream& in, int& line) {
    if (Parser::GetNextToken(in, line) != VAR) {
        // missing VAR
        return false; }
    while (Parser::Lookahead(in, line) != BEGIN) {
        if (!DeclStmt(in, line)) {
            // error in the DeclStmt
            return false; }
        if (Parser::GetNextToken(in, line) != SEMICOL) {
            // error -- missing semicolon
            return false; }
    }
    return true;
}

关键是你必须有一个解析器先行函数,它可以在不使用它的情况下为你提供下一个标记。