数学表达式的分词器

Tokenizer for math expression

我正在使用 StreamTokenizer 编写一个词法分析器来标记数学表达式。

作为输入,我给出表达式(1+π)²(1−π)²+(5.3−-2)/6。 我希望它被标记为
( 1 + π ) ² ( 1 - π ) ² + ( 5.3 − - 2 ) / 6
但我得到 ( 1 +π ) ² ( 1 -π ) ² + ( 5.3 −-2 ) / 6.

我知道我需要在输出的某些地方插入乘法运算符,稍后再做。

/* s: The inputted expression */
public static String tokenize(String s)[] throws IOException
{
    StreamTokenizer tokenizer = new StreamTokenizer(new StringReader(s));
    tokenizer.parseNumbers();
    tokenizer.wordChars('a', 'z');
    tokenizer.wordChars('A', 'Z');
    tokenizer.wordChars('A', 'Z');
    tokenizer.wordChars(SQUARED, SQUARED); // the superscript 2
    tokenizer.wordChars(PI, PI);
    tokenizer.wordChars(SUB.charAt(0), SUB.charAt(0)); // subtract (takeaway)
    tokenizer.wordChars(NEG.charAt(0), NEG.charAt(0)); // negate
    tokenizer.wordChars('/', '/');
    tokenizer.wordChars('*', '*');
    tokenizer.wordChars('+', '+');
    tokenizer.ordinaryChar(',');
    tokenizer.ordinaryChar('/');    // do not consider / as comment start

    ArrayList<String> tokBuf = new ArrayList<>();
    while (tokenizer.nextToken() != StreamTokenizer.TT_EOF) {
        switch (tokenizer.ttype) {
            case StreamTokenizer.TT_NUMBER:
                tokBuf.add(String.valueOf(tokenizer.nval));
                break;
            case StreamTokenizer.TT_WORD:
                tokBuf.add(tokenizer.sval);
                break;
            default:
                tokBuf.add(String.valueOf((char) tokenizer.ttype));
        }
    }
    String ret[] = new String[tokBuf.size()];
    ret = tokBuf.toArray(ret);

    return ret;
}
enum TokType {
    FIRST,
    OPERAND,
    OPERATOR,
    LPAREN,
    RPAREN,
}

boolean shouldMultBeEmitted(TokType tt)
{
    return tt == TokType.OPERAND || tt == TokType.RPAREN;
}

public ArrayList<String> tokenize(String in)
{
    TokType prevTok = TokType.FIRST; /* keep track of the type of the prev. tok, so
                                        we know when to insert a mult. sign */

    String regex = "(?<=[-−+*/()])|(?=[-−+*/()])";
    String toks[] = in.split(regex);

    /* the string has been tokenized; insert any needed multiplication signs */
    ArrayList<String> ret = new ArrayList<>();

    for (String x : toks) {
        if (isNumeric(x) || x.equals("Ans") || x.equals("π")) {
            if (shouldMultBeEmitted(prevTok))
                ret.add("*");

            prevTok = TokType.OPERAND;
        }
        else if (x.equals(LPAREN)) {
            if (shouldMultBeEmitted(prevTok))
                ret.add("*");

            prevTok = TokType.LPAREN;
        }
        else if (x.equals(RPAREN))
            prevTok = TokType.RPAREN;
        else if (isOperator(x))
            prevTok = TokType.OPERATOR;

        ret.add(x);
    }
    return ret;
}

感谢 提供正则表达式。

练习 reader:在平方 (²) 运算符后插入乘号,因此您在问题中提供的示例是正确的。