如何强制 ANTLR 解析所有输入的 CharStream
How to force ANTLR to parse all input CharStream
我正在使用 ANTLR4 来解析语法文件。当我使用 BaseErrorListener 检测错误时,我遇到了问题。当遇到非法输入字符串时,ANTLR 会自动匹配适当的分支,然后忽略后续的字符流,即使它包含错误。我想检测那个错误。这是我的 g4 文件和 java 文件。
TransitionLexer 是我的词法分析器文件,TransitionCondition 是我的解析器文件。 ErrorDialogListener.java 是我的 errorListener 和 Test.java id main java file.
TransitionLexer.g4
lexer grammar TransitionLexer;
BOOLEAN: 'true' | 'false';
IF: 'if';
THEN: 'then';
ELSE: 'else';
NAME: (ALPHA | CHINESE | '_')(ALPHA | CHINESE | '_'|DIGIT)*;
ALPHA: [a-zA-Z];
CHINESE: [\u4e00-\u9fa5];
NUMBER: INT | REAL;
INT: DIGIT+
|'(-'DIGIT+')';
REAL: DIGIT+ ('.' DIGIT+)?
| '(-' DIGIT+ ('.' DIGIT+)? ')';
fragment DIGIT: [0-9];
OPCOMPARE: '='|'>='|'<='|'>'|'<';
WS: [ \t\n\r]+ ->skip;
SL_COMMENT: '/*' .*? '*/' ->skip;
TransitionCondition.g4
grammar TransitionCondition;
import TransitionLexer;
condition : stat+;
stat : expr;
expr: expr (('and' | 'or') expr)+
| '(' expr ')'
| '(' var OPCOMPARE value ')'
| booleanExpr
| BOOLEAN
;
var: localStates
| globalStates
| connector
;
localStates: NAME;
globalStates: 'Top' ('.' brick)+ '.' NAME;
connector: brick '.' NAME;
value: userdefinedValue | basicValue;
userdefinedValue: NAME;
basicValue: basicValue op=('*'|'/') basicValue
| basicValue op=('+' | '-') basicValue
| basicValue ('and' | 'or') basicValue
| NUMBER | BOOLEAN
| '(' basicValue ')'
;
booleanExpr: booleanExpr OPCOMPARE booleanExpr
| '(' booleanExpr ')'
| NUMBER (OPCOMPARE|'*'| '/'|'+'|'-') NUMBER
;
brick: NAME;
ErrorDialogListener.java
package errorprocess;
import java.awt.Color;
import java.awt.Container;
import java.util.Collections;
import java.util.List;
import javax.swing.JDialog;
import javax.swing.JFrame;
import javax.swing.JLabel;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.atn.ATNConfigSet;
import org.antlr.v4.runtime.dfa.DFA;
public class ErrorDialogListener extends BaseErrorListener {
@Override
public void reportContextSensitivity(Parser recognizer, DFA dfa, int startIndex, int stopIndex, int prediction,
ATNConfigSet configs) {
System.out.println(dfa.toLexerString());
System.out.println(dfa.getStates());
super.reportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs);
}
@Override
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine,
String msg, RecognitionException e) {
List<String> stack = ((Parser)recognizer).getRuleInvocationStack();
Collections.reverse(stack);
StringBuilder buf = new StringBuilder();
buf.append("rule stack: "+stack+" ");
buf.append("line "+line+":"+charPositionInLine+" at "+
offendingSymbol+": "+msg);
JDialog dialog = new JDialog();
Container contentPane = dialog.getContentPane();
contentPane.add(new JLabel(buf.toString()));
contentPane.setBackground(Color.white);
dialog.setTitle("Syntax error");
dialog.pack();
dialog.setLocationRelativeTo(null);
dialog.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);
dialog.setVisible(true);
}
}
Test.java
package errorprocess;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.atn.PredictionMode;
import antlr4.my.transition.TransitionConditionLexer;
import antlr4.my.transition.TransitionConditionParser;
public class Test {
public static void main(String[] args) throws IOException {
InputStream in = new FileInputStream("G:\AltaRica\ANTLR4\test\condition\t.expr");
ANTLRInputStream input = new ANTLRInputStream(in);
TransitionConditionLexer lexer = new TransitionConditionLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer);
TransitionConditionParser parser = new TransitionConditionParser(tokens);
parser.removeErrorListeners();
parser.addErrorListener(new ErrorDialogListener());
// parser.addErrorListener(new DiagnosticErrorListener());
// parser.getInterpreter().setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);
// parser.getInterpreter().setPredictionMode(PredictionMode.LL);
parser.condition();
}
}
主要问题
当我的输入是
(Top.b2.states = nominal) and (b1.i1 = wrong) and (states >= 5.5),解析器工作正常。
但是当我的输入是 (Top.b2.states = nominal) aaa (b1.i1 = wrong) and (states >= 5.5) 时,解析器只解析 (Top.b2.states = nominal) 并忽略 aaa 之后的单词,这在语法文件中是不正确的。
我猜原因是解析器遵循了我在 TransitionCondition.g4 中的第一条规则的第二个分支,即 expr: '('expr')',而忽略了其他规则。那么如何强制 ANTLR 识别所有输入或如何强制 ANTLR 只选择第一个分支(expr: expr (('and' | 'or') expr)+)在这种情况下?
我试过的。
我尝试使用 DiagnosticErrorListener 或覆盖 reportContextSensitivity() 但它似乎不起作用。
您的主要规则需要以 EOF
标记结束 - ANTLR 提供的与输入结束匹配的特殊标记。
如果令牌不存在,ANTLR 将只解析它可以匹配的任何内容然后停止。通过将 EOF
放在输入规则的末尾,您告诉 ANTLR 它解析的任何内容都必须在输入结束时结束。
可能是回复晚了。另一种不使用 EOF
的方法,可以使用 ParserRuleContext#stop
字段来确定是否解析了完整的 input
:
// Assuming we are trying to parse input
ParserRuleContext context = parser.condition();
int stopIndex = context.stop.getStopIndex();
assert input.length() - 1 == stopIndex : "Complete input not parsed, remaining part: " + input.substring(stopIndex + 1);
我正在使用 ANTLR4 来解析语法文件。当我使用 BaseErrorListener 检测错误时,我遇到了问题。当遇到非法输入字符串时,ANTLR 会自动匹配适当的分支,然后忽略后续的字符流,即使它包含错误。我想检测那个错误。这是我的 g4 文件和 java 文件。
TransitionLexer 是我的词法分析器文件,TransitionCondition 是我的解析器文件。 ErrorDialogListener.java 是我的 errorListener 和 Test.java id main java file.
TransitionLexer.g4
lexer grammar TransitionLexer;
BOOLEAN: 'true' | 'false';
IF: 'if';
THEN: 'then';
ELSE: 'else';
NAME: (ALPHA | CHINESE | '_')(ALPHA | CHINESE | '_'|DIGIT)*;
ALPHA: [a-zA-Z];
CHINESE: [\u4e00-\u9fa5];
NUMBER: INT | REAL;
INT: DIGIT+
|'(-'DIGIT+')';
REAL: DIGIT+ ('.' DIGIT+)?
| '(-' DIGIT+ ('.' DIGIT+)? ')';
fragment DIGIT: [0-9];
OPCOMPARE: '='|'>='|'<='|'>'|'<';
WS: [ \t\n\r]+ ->skip;
SL_COMMENT: '/*' .*? '*/' ->skip;
TransitionCondition.g4
grammar TransitionCondition;
import TransitionLexer;
condition : stat+;
stat : expr;
expr: expr (('and' | 'or') expr)+
| '(' expr ')'
| '(' var OPCOMPARE value ')'
| booleanExpr
| BOOLEAN
;
var: localStates
| globalStates
| connector
;
localStates: NAME;
globalStates: 'Top' ('.' brick)+ '.' NAME;
connector: brick '.' NAME;
value: userdefinedValue | basicValue;
userdefinedValue: NAME;
basicValue: basicValue op=('*'|'/') basicValue
| basicValue op=('+' | '-') basicValue
| basicValue ('and' | 'or') basicValue
| NUMBER | BOOLEAN
| '(' basicValue ')'
;
booleanExpr: booleanExpr OPCOMPARE booleanExpr
| '(' booleanExpr ')'
| NUMBER (OPCOMPARE|'*'| '/'|'+'|'-') NUMBER
;
brick: NAME;
ErrorDialogListener.java
package errorprocess;
import java.awt.Color;
import java.awt.Container;
import java.util.Collections;
import java.util.List;
import javax.swing.JDialog;
import javax.swing.JFrame;
import javax.swing.JLabel;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.atn.ATNConfigSet;
import org.antlr.v4.runtime.dfa.DFA;
public class ErrorDialogListener extends BaseErrorListener {
@Override
public void reportContextSensitivity(Parser recognizer, DFA dfa, int startIndex, int stopIndex, int prediction,
ATNConfigSet configs) {
System.out.println(dfa.toLexerString());
System.out.println(dfa.getStates());
super.reportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs);
}
@Override
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine,
String msg, RecognitionException e) {
List<String> stack = ((Parser)recognizer).getRuleInvocationStack();
Collections.reverse(stack);
StringBuilder buf = new StringBuilder();
buf.append("rule stack: "+stack+" ");
buf.append("line "+line+":"+charPositionInLine+" at "+
offendingSymbol+": "+msg);
JDialog dialog = new JDialog();
Container contentPane = dialog.getContentPane();
contentPane.add(new JLabel(buf.toString()));
contentPane.setBackground(Color.white);
dialog.setTitle("Syntax error");
dialog.pack();
dialog.setLocationRelativeTo(null);
dialog.setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);
dialog.setVisible(true);
}
}
Test.java
package errorprocess;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.atn.PredictionMode;
import antlr4.my.transition.TransitionConditionLexer;
import antlr4.my.transition.TransitionConditionParser;
public class Test {
public static void main(String[] args) throws IOException {
InputStream in = new FileInputStream("G:\AltaRica\ANTLR4\test\condition\t.expr");
ANTLRInputStream input = new ANTLRInputStream(in);
TransitionConditionLexer lexer = new TransitionConditionLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer);
TransitionConditionParser parser = new TransitionConditionParser(tokens);
parser.removeErrorListeners();
parser.addErrorListener(new ErrorDialogListener());
// parser.addErrorListener(new DiagnosticErrorListener());
// parser.getInterpreter().setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);
// parser.getInterpreter().setPredictionMode(PredictionMode.LL);
parser.condition();
}
}
主要问题
当我的输入是
(Top.b2.states = nominal) and (b1.i1 = wrong) and (states >= 5.5),解析器工作正常。
但是当我的输入是 (Top.b2.states = nominal) aaa (b1.i1 = wrong) and (states >= 5.5) 时,解析器只解析 (Top.b2.states = nominal) 并忽略 aaa 之后的单词,这在语法文件中是不正确的。
我猜原因是解析器遵循了我在 TransitionCondition.g4 中的第一条规则的第二个分支,即 expr: '('expr')',而忽略了其他规则。那么如何强制 ANTLR 识别所有输入或如何强制 ANTLR 只选择第一个分支(expr: expr (('and' | 'or') expr)+)在这种情况下?
我试过的。
我尝试使用 DiagnosticErrorListener 或覆盖 reportContextSensitivity() 但它似乎不起作用。
您的主要规则需要以 EOF
标记结束 - ANTLR 提供的与输入结束匹配的特殊标记。
如果令牌不存在,ANTLR 将只解析它可以匹配的任何内容然后停止。通过将 EOF
放在输入规则的末尾,您告诉 ANTLR 它解析的任何内容都必须在输入结束时结束。
可能是回复晚了。另一种不使用 EOF
的方法,可以使用 ParserRuleContext#stop
字段来确定是否解析了完整的 input
:
// Assuming we are trying to parse input
ParserRuleContext context = parser.condition();
int stopIndex = context.stop.getStopIndex();
assert input.length() - 1 == stopIndex : "Complete input not parsed, remaining part: " + input.substring(stopIndex + 1);