不能完全让词法分析器在我的 Java 程序中工作
Can't quite get the Lexical Analyzer to work in my Java program
我正在尝试获取这段 Java 代码以对短语“(sum + 47) / total”进行词法分析并将其输出为:
Next token is: 25 Next lexeme is (
Next token is: 11 Next lexeme is sum
Next token is: 21 Next lexeme is +
Next token is: 10 Next lexeme is 47
Next token is: 26 Next lexeme is )
Next token is: 24 Next lexeme is /
Next token is: 11 Next lexeme is total
Next token is: -1 Next lexeme is EOF
然而,结果却是这样的:
Next token is: 25 Next lexeme is (
Next token is: 11 Next lexeme is um
Next token is: 21 Next lexeme is +
Next token is: 10 Next lexeme is 47
Next token is: 24 Next lexeme is /
Next token is: 11 Next lexeme is total
我知道我搞砸了一些东西让 EOF 不显示,但我不明白为什么它在 sum 和 47 之后切断了“s”和“)”。这是我的代码供参考。如果我需要对此做任何事情,请告诉我 post,因为这是我的第一个。
import java.io.*;
import java.util.*;
public class Main
{
private static final int LETTER=0;
private static final int DIGIT=1;
private static final int UNKNOWN=99;
private static final int EOF=-1;
private static final int INT_LIT=10;
private static final int IDENT=11;
private static final int ASSIGN_OP=20;
private static final int ADD_OP=21;
private static final int SUB_OP=22;
private static final int MULT_OP=23;
private static final int DIV_OP=24;
private static final int LEFT_PAREN=25;
private static final int RIGHT_PAREN=26;
private static int charClass;
private static char lexeme[];
private static char nextChar;
private static int lexLen;
private static int token;
private static int nextToken;
private static File file;
private static FileInputStream fis;
public static int lookup(char ch)
{
switch (ch)
{
case '(':
addChar();
nextToken = LEFT_PAREN;
break;
case ')':
addChar();
nextToken = RIGHT_PAREN;
break;
case '+':
addChar();
nextToken = ADD_OP;
break;
case '-':
addChar();
nextToken = SUB_OP;
break;
case '*':
addChar();
nextToken = MULT_OP;
break;
case '/':
addChar();
nextToken = DIV_OP;
break;
default:
addChar();
nextToken = EOF;
break;
}
return nextToken;
}
public static void addChar()
{
if (lexLen <= 98)
{
lexeme[lexLen++] = nextChar;
lexeme[lexLen] = 0;
}
else
System.out.println("Error -lexeme is too long\n");
}
public static void getChar()
{
try
{
if(fis.available()>0)
{
nextChar=(char)fis.read();
if(Character.isLetter(nextChar))
charClass=LETTER;
else if(Character.isDigit(nextChar))
charClass=DIGIT;
else
charClass=UNKNOWN;
}
else
charClass=EOF;
}
catch(IOException e)
{
e.printStackTrace();
}
}
public static void getNonBlank()
{
while(Character.isSpaceChar(nextChar))
getChar();
}
public static int lex()
{
lexLen = 0;
getNonBlank();
switch (charClass)
{
/* parse identifiers */
case LETTER:
addChar();
getChar();
while (charClass == LETTER || charClass == DIGIT)
{
addChar();
getChar();
}
nextToken = IDENT;
break;
/* parse integer literals and integers */
case DIGIT:
addChar();
getChar();
while(charClass == DIGIT)
{
addChar();
getChar();
}
nextToken = INT_LIT;
break;
/* parentheses and operators */
case UNKNOWN:
lookup(nextChar);
getChar();
break;
/* EOF */
case EOF:
nextToken = EOF;
break;
} /* end of switch */
System.out.print("Next token is :"+nextToken+" Next lexeme is :");
for(int i=0;i<lexLen;i++)
System.out.print(lexeme[i]);
System.out.println();
return nextToken;
}
public static void main(String args[])
{
lexLen=0;
lexeme=new char[100];
for(int i=0;i<100;i++)
lexeme[i]='0';
file = new File("input1.txt");
if (!file.exists())
{
System.out.println( "input1.txt does not exist.");
return;
}
if (!(file.isFile() && file.canRead()))
{
System.out.println(file.getName() + " cannot be read.");
return;
}
try
{
fis = new FileInputStream(file);
char current;
while (fis.available() > 0)
{
getChar();
// System.out.println(nextChar+" "+charClass);
lex();
}
}
catch (IOException e)
{
e.printStackTrace();
}
}
}
掉落的字符错误和丢失的 EOF 错误都发生在这个循环中:
while (fis.available() > 0)
{
getChar();
lex();
}
你应该能够通过在纸上简单输入执行该循环来解决问题。 (例如,尝试 ()
后跟 end-of-file。)
这两个问题的关键在于 lex
的契约——即世界在它执行前后应该是什么样子的规范——包括:
- 前置条件(调用
lex
时必须为真):nextChar
是下一个可用的输入字符,charClass
是它的class.
- postcondition(
lex
保证调用后为真):nextChar
是下一个可用的输入字符,charClass
是它的 class.
请注意,这些是相同的,这并不少见。这通常被称为不变。
另一方面,getChar
的合同是:
- 先决条件:不再需要
nextChar
和charClass
的值。
- postcondition:
nextChar
是下一个可用的输入字符,charClass
是它的class.
明确记录您编写的每个函数的契约始终是一个好习惯。这样做将帮助您发现问题。特别是,鉴于 lex
的后置条件和 getChar
的前置条件(将在下一次循环迭代开始时调用),您能说些什么?
如果您将 end-of-file 指标的条件添加到上述模型中,您可能也会看到该错误。
我正在尝试获取这段 Java 代码以对短语“(sum + 47) / total”进行词法分析并将其输出为:
Next token is: 25 Next lexeme is (
Next token is: 11 Next lexeme is sum
Next token is: 21 Next lexeme is +
Next token is: 10 Next lexeme is 47
Next token is: 26 Next lexeme is )
Next token is: 24 Next lexeme is /
Next token is: 11 Next lexeme is total
Next token is: -1 Next lexeme is EOF
然而,结果却是这样的:
Next token is: 25 Next lexeme is (
Next token is: 11 Next lexeme is um
Next token is: 21 Next lexeme is +
Next token is: 10 Next lexeme is 47
Next token is: 24 Next lexeme is /
Next token is: 11 Next lexeme is total
我知道我搞砸了一些东西让 EOF 不显示,但我不明白为什么它在 sum 和 47 之后切断了“s”和“)”。这是我的代码供参考。如果我需要对此做任何事情,请告诉我 post,因为这是我的第一个。
import java.io.*;
import java.util.*;
public class Main
{
private static final int LETTER=0;
private static final int DIGIT=1;
private static final int UNKNOWN=99;
private static final int EOF=-1;
private static final int INT_LIT=10;
private static final int IDENT=11;
private static final int ASSIGN_OP=20;
private static final int ADD_OP=21;
private static final int SUB_OP=22;
private static final int MULT_OP=23;
private static final int DIV_OP=24;
private static final int LEFT_PAREN=25;
private static final int RIGHT_PAREN=26;
private static int charClass;
private static char lexeme[];
private static char nextChar;
private static int lexLen;
private static int token;
private static int nextToken;
private static File file;
private static FileInputStream fis;
public static int lookup(char ch)
{
switch (ch)
{
case '(':
addChar();
nextToken = LEFT_PAREN;
break;
case ')':
addChar();
nextToken = RIGHT_PAREN;
break;
case '+':
addChar();
nextToken = ADD_OP;
break;
case '-':
addChar();
nextToken = SUB_OP;
break;
case '*':
addChar();
nextToken = MULT_OP;
break;
case '/':
addChar();
nextToken = DIV_OP;
break;
default:
addChar();
nextToken = EOF;
break;
}
return nextToken;
}
public static void addChar()
{
if (lexLen <= 98)
{
lexeme[lexLen++] = nextChar;
lexeme[lexLen] = 0;
}
else
System.out.println("Error -lexeme is too long\n");
}
public static void getChar()
{
try
{
if(fis.available()>0)
{
nextChar=(char)fis.read();
if(Character.isLetter(nextChar))
charClass=LETTER;
else if(Character.isDigit(nextChar))
charClass=DIGIT;
else
charClass=UNKNOWN;
}
else
charClass=EOF;
}
catch(IOException e)
{
e.printStackTrace();
}
}
public static void getNonBlank()
{
while(Character.isSpaceChar(nextChar))
getChar();
}
public static int lex()
{
lexLen = 0;
getNonBlank();
switch (charClass)
{
/* parse identifiers */
case LETTER:
addChar();
getChar();
while (charClass == LETTER || charClass == DIGIT)
{
addChar();
getChar();
}
nextToken = IDENT;
break;
/* parse integer literals and integers */
case DIGIT:
addChar();
getChar();
while(charClass == DIGIT)
{
addChar();
getChar();
}
nextToken = INT_LIT;
break;
/* parentheses and operators */
case UNKNOWN:
lookup(nextChar);
getChar();
break;
/* EOF */
case EOF:
nextToken = EOF;
break;
} /* end of switch */
System.out.print("Next token is :"+nextToken+" Next lexeme is :");
for(int i=0;i<lexLen;i++)
System.out.print(lexeme[i]);
System.out.println();
return nextToken;
}
public static void main(String args[])
{
lexLen=0;
lexeme=new char[100];
for(int i=0;i<100;i++)
lexeme[i]='0';
file = new File("input1.txt");
if (!file.exists())
{
System.out.println( "input1.txt does not exist.");
return;
}
if (!(file.isFile() && file.canRead()))
{
System.out.println(file.getName() + " cannot be read.");
return;
}
try
{
fis = new FileInputStream(file);
char current;
while (fis.available() > 0)
{
getChar();
// System.out.println(nextChar+" "+charClass);
lex();
}
}
catch (IOException e)
{
e.printStackTrace();
}
}
}
掉落的字符错误和丢失的 EOF 错误都发生在这个循环中:
while (fis.available() > 0)
{
getChar();
lex();
}
你应该能够通过在纸上简单输入执行该循环来解决问题。 (例如,尝试 ()
后跟 end-of-file。)
这两个问题的关键在于 lex
的契约——即世界在它执行前后应该是什么样子的规范——包括:
- 前置条件(调用
lex
时必须为真):nextChar
是下一个可用的输入字符,charClass
是它的class. - postcondition(
lex
保证调用后为真):nextChar
是下一个可用的输入字符,charClass
是它的 class.
请注意,这些是相同的,这并不少见。这通常被称为不变。
另一方面,getChar
的合同是:
- 先决条件:不再需要
nextChar
和charClass
的值。 - postcondition:
nextChar
是下一个可用的输入字符,charClass
是它的class.
明确记录您编写的每个函数的契约始终是一个好习惯。这样做将帮助您发现问题。特别是,鉴于 lex
的后置条件和 getChar
的前置条件(将在下一次循环迭代开始时调用),您能说些什么?
如果您将 end-of-file 指标的条件添加到上述模型中,您可能也会看到该错误。