数学表达式的分词器
Tokenizer for math expression
我正在使用 StreamTokenizer
编写一个词法分析器来标记数学表达式。
作为输入,我给出表达式(1+π)²(1−π)²+(5.3−-2)/6
。
我希望它被标记为
( 1 + π ) ² ( 1 - π ) ² + ( 5.3 − - 2 ) / 6
但我得到 ( 1 +π ) ² ( 1 -π ) ² + ( 5.3 −-2 ) / 6
.
我知道我需要在输出的某些地方插入乘法运算符,稍后再做。
/* s: The inputted expression */
public static String tokenize(String s)[] throws IOException
{
StreamTokenizer tokenizer = new StreamTokenizer(new StringReader(s));
tokenizer.parseNumbers();
tokenizer.wordChars('a', 'z');
tokenizer.wordChars('A', 'Z');
tokenizer.wordChars('A', 'Z');
tokenizer.wordChars(SQUARED, SQUARED); // the superscript 2
tokenizer.wordChars(PI, PI);
tokenizer.wordChars(SUB.charAt(0), SUB.charAt(0)); // subtract (takeaway)
tokenizer.wordChars(NEG.charAt(0), NEG.charAt(0)); // negate
tokenizer.wordChars('/', '/');
tokenizer.wordChars('*', '*');
tokenizer.wordChars('+', '+');
tokenizer.ordinaryChar(',');
tokenizer.ordinaryChar('/'); // do not consider / as comment start
ArrayList<String> tokBuf = new ArrayList<>();
while (tokenizer.nextToken() != StreamTokenizer.TT_EOF) {
switch (tokenizer.ttype) {
case StreamTokenizer.TT_NUMBER:
tokBuf.add(String.valueOf(tokenizer.nval));
break;
case StreamTokenizer.TT_WORD:
tokBuf.add(tokenizer.sval);
break;
default:
tokBuf.add(String.valueOf((char) tokenizer.ttype));
}
}
String ret[] = new String[tokBuf.size()];
ret = tokBuf.toArray(ret);
return ret;
}
enum TokType {
FIRST,
OPERAND,
OPERATOR,
LPAREN,
RPAREN,
}
boolean shouldMultBeEmitted(TokType tt)
{
return tt == TokType.OPERAND || tt == TokType.RPAREN;
}
public ArrayList<String> tokenize(String in)
{
TokType prevTok = TokType.FIRST; /* keep track of the type of the prev. tok, so
we know when to insert a mult. sign */
String regex = "(?<=[-−+*/()])|(?=[-−+*/()])";
String toks[] = in.split(regex);
/* the string has been tokenized; insert any needed multiplication signs */
ArrayList<String> ret = new ArrayList<>();
for (String x : toks) {
if (isNumeric(x) || x.equals("Ans") || x.equals("π")) {
if (shouldMultBeEmitted(prevTok))
ret.add("*");
prevTok = TokType.OPERAND;
}
else if (x.equals(LPAREN)) {
if (shouldMultBeEmitted(prevTok))
ret.add("*");
prevTok = TokType.LPAREN;
}
else if (x.equals(RPAREN))
prevTok = TokType.RPAREN;
else if (isOperator(x))
prevTok = TokType.OPERATOR;
ret.add(x);
}
return ret;
}
感谢 提供正则表达式。
练习 reader:在平方 (²
) 运算符后插入乘号,因此您在问题中提供的示例是正确的。
我正在使用 StreamTokenizer
编写一个词法分析器来标记数学表达式。
作为输入,我给出表达式(1+π)²(1−π)²+(5.3−-2)/6
。
我希望它被标记为
( 1 + π ) ² ( 1 - π ) ² + ( 5.3 − - 2 ) / 6
但我得到 ( 1 +π ) ² ( 1 -π ) ² + ( 5.3 −-2 ) / 6
.
我知道我需要在输出的某些地方插入乘法运算符,稍后再做。
/* s: The inputted expression */
public static String tokenize(String s)[] throws IOException
{
StreamTokenizer tokenizer = new StreamTokenizer(new StringReader(s));
tokenizer.parseNumbers();
tokenizer.wordChars('a', 'z');
tokenizer.wordChars('A', 'Z');
tokenizer.wordChars('A', 'Z');
tokenizer.wordChars(SQUARED, SQUARED); // the superscript 2
tokenizer.wordChars(PI, PI);
tokenizer.wordChars(SUB.charAt(0), SUB.charAt(0)); // subtract (takeaway)
tokenizer.wordChars(NEG.charAt(0), NEG.charAt(0)); // negate
tokenizer.wordChars('/', '/');
tokenizer.wordChars('*', '*');
tokenizer.wordChars('+', '+');
tokenizer.ordinaryChar(',');
tokenizer.ordinaryChar('/'); // do not consider / as comment start
ArrayList<String> tokBuf = new ArrayList<>();
while (tokenizer.nextToken() != StreamTokenizer.TT_EOF) {
switch (tokenizer.ttype) {
case StreamTokenizer.TT_NUMBER:
tokBuf.add(String.valueOf(tokenizer.nval));
break;
case StreamTokenizer.TT_WORD:
tokBuf.add(tokenizer.sval);
break;
default:
tokBuf.add(String.valueOf((char) tokenizer.ttype));
}
}
String ret[] = new String[tokBuf.size()];
ret = tokBuf.toArray(ret);
return ret;
}
enum TokType {
FIRST,
OPERAND,
OPERATOR,
LPAREN,
RPAREN,
}
boolean shouldMultBeEmitted(TokType tt)
{
return tt == TokType.OPERAND || tt == TokType.RPAREN;
}
public ArrayList<String> tokenize(String in)
{
TokType prevTok = TokType.FIRST; /* keep track of the type of the prev. tok, so
we know when to insert a mult. sign */
String regex = "(?<=[-−+*/()])|(?=[-−+*/()])";
String toks[] = in.split(regex);
/* the string has been tokenized; insert any needed multiplication signs */
ArrayList<String> ret = new ArrayList<>();
for (String x : toks) {
if (isNumeric(x) || x.equals("Ans") || x.equals("π")) {
if (shouldMultBeEmitted(prevTok))
ret.add("*");
prevTok = TokType.OPERAND;
}
else if (x.equals(LPAREN)) {
if (shouldMultBeEmitted(prevTok))
ret.add("*");
prevTok = TokType.LPAREN;
}
else if (x.equals(RPAREN))
prevTok = TokType.RPAREN;
else if (isOperator(x))
prevTok = TokType.OPERATOR;
ret.add(x);
}
return ret;
}
感谢 提供正则表达式。
练习 reader:在平方 (²
) 运算符后插入乘号,因此您在问题中提供的示例是正确的。