小语种如何使用EBNF文法?
How to use EBNF grammar for a small language?
我正在做一个用 EBNF 语法解析文件的作业,我对递归函数的工作有点困惑。
-前三个函数:
Prog ::= PROGRAM IDENT; DeclBlock ProgBody
DeclBlock ::= VAR {DeclStmt;}
DeclStmt ::= Ident {, Ident} : (Integer | Real | String)
我知道您应该先调用 Prog,然后调用 DeclBlock,然后调用 DeclStmt。那么调用DeclBlock时,是看到分号再调用还是看到另一个IDENT再调用? DeclStmt也是一样,是不是看到逗号再调用?
//Prog ::= PROGRAM IDENT; DeclBlock ProgBody
bool Prog(istream& in, int& line){
bool status;
LexItem t;
t = Parser::GetNextToken(in, line);
cout << t << endl;
if(t != PROGRAM){
ParseError(line, "Missing PROGRAM");
return false;
}
LexItem i = Parser::GetNextToken(in,line);
cout << i << endl;
if(i != IDENT){
ParseError(line, "Missing Program Name");
return false;
}
LexItem semi = Parser::GetNextToken(in, line);
cout << semi << endl;
if(semi != SEMICOL){
ParseError(line, "Missing SemiColon");
return false;
}
status = DeclBlock(in, line);
if(!status){
ParseError(line, "Incorrect Declaration Section.");
return false;
}
LexItem b = Parser::GetNextToken(in, line);
cout << "here at b" << b << endl;
if(b != BEGIN){
ParseError(line, "Non-recognizable Program Block");
ParseError(line, "Incorrect Program Section");
return false;
}
status = ProgBody(in, line);
if(!status){
ParseError(line, "Incorrect Program Block");
return false;
}
LexItem e = Parser::GetNextToken(in, line);
cout << e << endl;
if(e != END){
ParseError(line, "Non-recognizable Program Block");
ParseError(line, "Incorrect Program Section");
return false;
}
return true;
}
//DeclBlock ::= VAR {DeclStmt;}
bool DeclBlock(istream& in, int& line){
bool status = false;
LexItem v = Parser::GetNextToken(in, line);
cout << v << endl;
if(v != VAR){
ParseError(line, "Non-recognizable Declaration Block.");
return false;
}
status = DeclStmt(in, line);
if(!status){
ParseError(line, "Syntactic error in Declaration Block.");
return false;
}
return true;
}
//DeclStmt ::= Ident {, Ident} : (Integer | Real | String)
bool DeclStmt(istream& in, int& line){
bool status = false;
LexItem tok = Parser::GetNextToken(in, line);
cout << "here too " << tok << endl;
if (defVar.find(tok.GetLexeme()) != defVar.end()) {
cout << "Var Exists!" << endl;
ParseError(line, "Var cant be redeclared");
return false;
}
else{
defVar.insert({tok.GetLexeme(), true});
}
LexItem c = Parser::GetNextToken(in, line);
cout << c << endl;
if(c == COMMA){
//cout << "before calling declStmt" << endl;
status = DeclStmt(in, line);
//cout << "after calling declStmt" << endl;
}
else if(c.GetToken() == IDENT){
ParseError(line, "Unrecognized Input Pattern");
cout << "( here " << c.GetLexeme() << ")" << endl;
return false;
}
// else if(c == IDENT){
// ParseError(line, "Missing comma");
// return false;
// }
else{
// Parser::PushBackToken(c);
if(c != COLON){
ParseError(line, "Missing Colon");
return false;
}
LexItem t = Parser::GetNextToken(in, line);
cout << "here t " << t.GetLexeme() << endl;
if(t.GetLexeme() != "REAL" && t.GetLexeme() != "INTEGER" && t.GetLexeme() != "STRING"){
ParseError(line, "Incorrect Declaration Type.");
return false;
}
LexItem semi = Parser::GetNextToken(in,line);
cout << semi << endl;
if(semi != SEMICOL){
ParseError(line, "Missing SemiColon");
return false;
}
return true;
}
return status;
}
DeclBlock
只能用这种语言调用一次。 DeclStmt
可以调用多次。 DeclStmt
被定义为一个或多个 Ident
后跟 :
,再跟一个类型,然后以 ;
.
结尾
阅读 DeclStmt
末尾的 ;
后,您将阅读下一个标记以决定下一步做什么。如果下一个标记是另一个 Ident
,你知道你在另一个 DeclStmt
的开始,所以你会再次调用它。如果是其他情况,您知道您正处于 ProgBody
的开头。 (我假设最后一位。通常你会寻找以 ProgBody
开头的标记,但未显示。)
您的 DeclBlock 函数应该类似于
bool DeclBlock(istream& in, int& line) {
if (Parser::GetNextToken(in, line) != VAR) {
// missing VAR
return false; }
while (Parser::Lookahead(in, line) != BEGIN) {
if (!DeclStmt(in, line)) {
// error in the DeclStmt
return false; }
if (Parser::GetNextToken(in, line) != SEMICOL) {
// error -- missing semicolon
return false; }
}
return true;
}
关键是你必须有一个解析器先行函数,它可以在不使用它的情况下为你提供下一个标记。
我正在做一个用 EBNF 语法解析文件的作业,我对递归函数的工作有点困惑。
-前三个函数:
Prog ::= PROGRAM IDENT; DeclBlock ProgBody
DeclBlock ::= VAR {DeclStmt;}
DeclStmt ::= Ident {, Ident} : (Integer | Real | String)
我知道您应该先调用 Prog,然后调用 DeclBlock,然后调用 DeclStmt。那么调用DeclBlock时,是看到分号再调用还是看到另一个IDENT再调用? DeclStmt也是一样,是不是看到逗号再调用?
//Prog ::= PROGRAM IDENT; DeclBlock ProgBody
bool Prog(istream& in, int& line){
bool status;
LexItem t;
t = Parser::GetNextToken(in, line);
cout << t << endl;
if(t != PROGRAM){
ParseError(line, "Missing PROGRAM");
return false;
}
LexItem i = Parser::GetNextToken(in,line);
cout << i << endl;
if(i != IDENT){
ParseError(line, "Missing Program Name");
return false;
}
LexItem semi = Parser::GetNextToken(in, line);
cout << semi << endl;
if(semi != SEMICOL){
ParseError(line, "Missing SemiColon");
return false;
}
status = DeclBlock(in, line);
if(!status){
ParseError(line, "Incorrect Declaration Section.");
return false;
}
LexItem b = Parser::GetNextToken(in, line);
cout << "here at b" << b << endl;
if(b != BEGIN){
ParseError(line, "Non-recognizable Program Block");
ParseError(line, "Incorrect Program Section");
return false;
}
status = ProgBody(in, line);
if(!status){
ParseError(line, "Incorrect Program Block");
return false;
}
LexItem e = Parser::GetNextToken(in, line);
cout << e << endl;
if(e != END){
ParseError(line, "Non-recognizable Program Block");
ParseError(line, "Incorrect Program Section");
return false;
}
return true;
}
//DeclBlock ::= VAR {DeclStmt;}
bool DeclBlock(istream& in, int& line){
bool status = false;
LexItem v = Parser::GetNextToken(in, line);
cout << v << endl;
if(v != VAR){
ParseError(line, "Non-recognizable Declaration Block.");
return false;
}
status = DeclStmt(in, line);
if(!status){
ParseError(line, "Syntactic error in Declaration Block.");
return false;
}
return true;
}
//DeclStmt ::= Ident {, Ident} : (Integer | Real | String)
bool DeclStmt(istream& in, int& line){
bool status = false;
LexItem tok = Parser::GetNextToken(in, line);
cout << "here too " << tok << endl;
if (defVar.find(tok.GetLexeme()) != defVar.end()) {
cout << "Var Exists!" << endl;
ParseError(line, "Var cant be redeclared");
return false;
}
else{
defVar.insert({tok.GetLexeme(), true});
}
LexItem c = Parser::GetNextToken(in, line);
cout << c << endl;
if(c == COMMA){
//cout << "before calling declStmt" << endl;
status = DeclStmt(in, line);
//cout << "after calling declStmt" << endl;
}
else if(c.GetToken() == IDENT){
ParseError(line, "Unrecognized Input Pattern");
cout << "( here " << c.GetLexeme() << ")" << endl;
return false;
}
// else if(c == IDENT){
// ParseError(line, "Missing comma");
// return false;
// }
else{
// Parser::PushBackToken(c);
if(c != COLON){
ParseError(line, "Missing Colon");
return false;
}
LexItem t = Parser::GetNextToken(in, line);
cout << "here t " << t.GetLexeme() << endl;
if(t.GetLexeme() != "REAL" && t.GetLexeme() != "INTEGER" && t.GetLexeme() != "STRING"){
ParseError(line, "Incorrect Declaration Type.");
return false;
}
LexItem semi = Parser::GetNextToken(in,line);
cout << semi << endl;
if(semi != SEMICOL){
ParseError(line, "Missing SemiColon");
return false;
}
return true;
}
return status;
}
DeclBlock
只能用这种语言调用一次。 DeclStmt
可以调用多次。 DeclStmt
被定义为一个或多个 Ident
后跟 :
,再跟一个类型,然后以 ;
.
阅读 DeclStmt
末尾的 ;
后,您将阅读下一个标记以决定下一步做什么。如果下一个标记是另一个 Ident
,你知道你在另一个 DeclStmt
的开始,所以你会再次调用它。如果是其他情况,您知道您正处于 ProgBody
的开头。 (我假设最后一位。通常你会寻找以 ProgBody
开头的标记,但未显示。)
您的 DeclBlock 函数应该类似于
bool DeclBlock(istream& in, int& line) {
if (Parser::GetNextToken(in, line) != VAR) {
// missing VAR
return false; }
while (Parser::Lookahead(in, line) != BEGIN) {
if (!DeclStmt(in, line)) {
// error in the DeclStmt
return false; }
if (Parser::GetNextToken(in, line) != SEMICOL) {
// error -- missing semicolon
return false; }
}
return true;
}
关键是你必须有一个解析器先行函数,它可以在不使用它的情况下为你提供下一个标记。