antlr4/java: 漂亮地将解析树打印到标准输出
antlr4/java: pretty print parse tree to stdout
初学者问题:如何将解析树的可读版本打印到标准输出?
CharStream input = CharStreams.fromFileName("testdata/test.txt");
MyLexer lexer = new MyLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer);
MyParser parser = new MyParser(tokens);
parser.setBuildParseTree(true);
RuleContext tree = parser.record();
System.out.println(tree.toStringTree(parser));
这会将整个树打印在由方括号“()”分隔的一行中。
(record (husband <4601> (name KOHAI Nikolaus) \n (birth * um.1872 (place Ploschitz)) \n\n) (wife (marriage oo) \n (name SCHLOTTHAUER Maria) \n (birth * um.1877
...
我想要这样的东西
record
husband
<id>
name
<name>
...
wife
作为独立实用程序从 SnippetsTest 中提取 class:
import java.util.List;
import org.antlr.v4.runtime.misc.Utils;
import org.antlr.v4.runtime.tree.Tree;
import org.antlr.v4.runtime.tree.Trees;
public class TreeUtils {
/** Platform dependent end-of-line marker */
public static final String Eol = System.lineSeparator();
/** The literal indent char(s) used for pretty-printing */
public static final String Indents = " ";
private static int level;
private TreeUtils() {}
/**
* Pretty print out a whole tree. {@link #getNodeText} is used on the node payloads to get the text
* for the nodes. (Derived from Trees.toStringTree(....))
*/
public static String toPrettyTree(final Tree t, final List<String> ruleNames) {
level = 0;
return process(t, ruleNames).replaceAll("(?m)^\s+$", "").replaceAll("\r?\n\r?\n", Eol);
}
private static String process(final Tree t, final List<String> ruleNames) {
if (t.getChildCount() == 0) return Utils.escapeWhitespace(Trees.getNodeText(t, ruleNames), false);
StringBuilder sb = new StringBuilder();
sb.append(lead(level));
level++;
String s = Utils.escapeWhitespace(Trees.getNodeText(t, ruleNames), false);
sb.append(s + ' ');
for (int i = 0; i < t.getChildCount(); i++) {
sb.append(process(t.getChild(i), ruleNames));
}
level--;
sb.append(lead(level));
return sb.toString();
}
private static String lead(int level) {
StringBuilder sb = new StringBuilder();
if (level > 0) {
sb.append(Eol);
for (int cnt = 0; cnt < level; cnt++) {
sb.append(Indents);
}
}
return sb.toString();
}
}
调用方法如下:
List<String> ruleNamesList = Arrays.asList(parser.getRuleNames());
String prettyTree = TreeUtils.toPrettyTree(tree, ruleNamesList);
除了图形解析树 my ANTLR4 extension for Visual Studio Code 还生成格式化文本解析树:
如果你喜欢只将正则表达式用于真正的用途,你可以自己打印一棵树:
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.Trees;
public static String printSyntaxTree(Parser parser, ParseTree root) {
StringBuilder buf = new StringBuilder();
recursive(root, buf, 0, Arrays.asList(parser.getRuleNames()));
return buf.toString();
}
private static void recursive(ParseTree aRoot, StringBuilder buf, int offset, List<String> ruleNames) {
for (int i = 0; i < offset; i++) {
buf.append(" ");
}
buf.append(Trees.getNodeText(aRoot, ruleNames)).append("\n");
if (aRoot instanceof ParserRuleContext) {
ParserRuleContext prc = (ParserRuleContext) aRoot;
if (prc.children != null) {
for (ParseTree child : prc.children) {
recursive(child, buf, offset + 1, ruleNames);
}
}
}
}
用法:
ParseTree root = parser.yourOwnRule();
System.out.println(printSyntaxTree(parser, root));
我想对此发表自己的看法,利用我已经在我的项目中使用 StringTemplate 的事实。这意味着我不必像其他答案那样手动处理关卡。它还使输出格式更易于自定义。
最重要的是,我发布这篇文章的主要原因是我决定跳过我只有 'passing through' 的打印规则,即在使用链式规则时
a : b | something_else ;
b : c | another ;
c : d | yet_more ;
d : rule that matters ;
因为在检查来自小输入的树时没有添加任何有用信息时,它们使我的输出混乱。这也很容易更改,在//pass-through rules
评论位置。
我还复制了 Trees.getNodeText
的定义并修改它以使用普通数组来摆脱不必要的包装,如果我愿意,甚至可以让我自定义它。
最后,我让它获取解析器和树并直接转储到 System.out,因为这是我唯一需要它的情况。
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.tree.ErrorNode;
import org.antlr.v4.runtime.tree.TerminalNode;
import org.antlr.v4.runtime.tree.Tree;
import org.stringtemplate.v4.ST;
//for pretty-dumping trees in short form
public class TreeUtils {
private static final ST template() {
return new ST("<rule_text>\n\t<child; separator=\"\n\">");
}
private static final ST literal(String text) {
return new ST("<text>").add("text", text);
}
public static void dump(Parser parser, Tree tree) {
System.out.println(process(parser.getRuleNames(),tree).render());
}
private static String getNodeText(Tree t, String[] ruleNames) {
if ( t instanceof RuleContext ) {
int ruleIndex = ((RuleContext)t).getRuleContext().getRuleIndex();
String ruleName = ruleNames[ruleIndex];
return ruleName;
}
else if ( t instanceof ErrorNode) {
return t.toString();
}
else if ( t instanceof TerminalNode) {
Token symbol = ((TerminalNode)t).getSymbol();
if (symbol != null) {
String s = symbol.getText();
return s;
}
}
Object payload = t.getPayload();
if ( payload instanceof Token ) {
return ((Token)payload).getText();
}
return t.getPayload().toString();
}
private static ST process(String[] ruleNames, Tree t) {
if(t.getChildCount()==0) {
return literal(getNodeText(t, ruleNames));
} else if(t.getChildCount()==1) {
//pass-through rules
return process(ruleNames,t.getChild(0));
} else {
ST out=template();
out.add("rule_text", getNodeText(t, ruleNames));
for(int i=0;i<t.getChildCount();i++) {
out.add("child", process(ruleNames,t.getChild(i)));
}
return out;
}
}
}
对于Kotlin,可以使用这个扩展函数
fun Tree.format(parser: Parser, indent: Int = 0): String = buildString {
val tree = this@format
val prefix = " ".repeat(indent)
append(prefix)
append(Trees.getNodeText(tree, parser))
if (tree.childCount != 0) {
append(" (\n")
for (i in 0 until tree.childCount) {
append(tree.getChild(i).format(parser, indent + 1))
append("\n")
}
append(prefix).append(")")
}
}
初学者问题:如何将解析树的可读版本打印到标准输出?
CharStream input = CharStreams.fromFileName("testdata/test.txt");
MyLexer lexer = new MyLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lexer);
MyParser parser = new MyParser(tokens);
parser.setBuildParseTree(true);
RuleContext tree = parser.record();
System.out.println(tree.toStringTree(parser));
这会将整个树打印在由方括号“()”分隔的一行中。
(record (husband <4601> (name KOHAI Nikolaus) \n (birth * um.1872 (place Ploschitz)) \n\n) (wife (marriage oo) \n (name SCHLOTTHAUER Maria) \n (birth * um.1877
...
我想要这样的东西
record
husband
<id>
name
<name>
...
wife
作为独立实用程序从 SnippetsTest 中提取 class:
import java.util.List;
import org.antlr.v4.runtime.misc.Utils;
import org.antlr.v4.runtime.tree.Tree;
import org.antlr.v4.runtime.tree.Trees;
public class TreeUtils {
/** Platform dependent end-of-line marker */
public static final String Eol = System.lineSeparator();
/** The literal indent char(s) used for pretty-printing */
public static final String Indents = " ";
private static int level;
private TreeUtils() {}
/**
* Pretty print out a whole tree. {@link #getNodeText} is used on the node payloads to get the text
* for the nodes. (Derived from Trees.toStringTree(....))
*/
public static String toPrettyTree(final Tree t, final List<String> ruleNames) {
level = 0;
return process(t, ruleNames).replaceAll("(?m)^\s+$", "").replaceAll("\r?\n\r?\n", Eol);
}
private static String process(final Tree t, final List<String> ruleNames) {
if (t.getChildCount() == 0) return Utils.escapeWhitespace(Trees.getNodeText(t, ruleNames), false);
StringBuilder sb = new StringBuilder();
sb.append(lead(level));
level++;
String s = Utils.escapeWhitespace(Trees.getNodeText(t, ruleNames), false);
sb.append(s + ' ');
for (int i = 0; i < t.getChildCount(); i++) {
sb.append(process(t.getChild(i), ruleNames));
}
level--;
sb.append(lead(level));
return sb.toString();
}
private static String lead(int level) {
StringBuilder sb = new StringBuilder();
if (level > 0) {
sb.append(Eol);
for (int cnt = 0; cnt < level; cnt++) {
sb.append(Indents);
}
}
return sb.toString();
}
}
调用方法如下:
List<String> ruleNamesList = Arrays.asList(parser.getRuleNames());
String prettyTree = TreeUtils.toPrettyTree(tree, ruleNamesList);
除了图形解析树 my ANTLR4 extension for Visual Studio Code 还生成格式化文本解析树:
如果你喜欢只将正则表达式用于真正的用途,你可以自己打印一棵树:
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.Trees;
public static String printSyntaxTree(Parser parser, ParseTree root) {
StringBuilder buf = new StringBuilder();
recursive(root, buf, 0, Arrays.asList(parser.getRuleNames()));
return buf.toString();
}
private static void recursive(ParseTree aRoot, StringBuilder buf, int offset, List<String> ruleNames) {
for (int i = 0; i < offset; i++) {
buf.append(" ");
}
buf.append(Trees.getNodeText(aRoot, ruleNames)).append("\n");
if (aRoot instanceof ParserRuleContext) {
ParserRuleContext prc = (ParserRuleContext) aRoot;
if (prc.children != null) {
for (ParseTree child : prc.children) {
recursive(child, buf, offset + 1, ruleNames);
}
}
}
}
用法:
ParseTree root = parser.yourOwnRule();
System.out.println(printSyntaxTree(parser, root));
我想对此发表自己的看法,利用我已经在我的项目中使用 StringTemplate 的事实。这意味着我不必像其他答案那样手动处理关卡。它还使输出格式更易于自定义。
最重要的是,我发布这篇文章的主要原因是我决定跳过我只有 'passing through' 的打印规则,即在使用链式规则时
a : b | something_else ;
b : c | another ;
c : d | yet_more ;
d : rule that matters ;
因为在检查来自小输入的树时没有添加任何有用信息时,它们使我的输出混乱。这也很容易更改,在//pass-through rules
评论位置。
我还复制了 Trees.getNodeText
的定义并修改它以使用普通数组来摆脱不必要的包装,如果我愿意,甚至可以让我自定义它。
最后,我让它获取解析器和树并直接转储到 System.out,因为这是我唯一需要它的情况。
import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.tree.ErrorNode;
import org.antlr.v4.runtime.tree.TerminalNode;
import org.antlr.v4.runtime.tree.Tree;
import org.stringtemplate.v4.ST;
//for pretty-dumping trees in short form
public class TreeUtils {
private static final ST template() {
return new ST("<rule_text>\n\t<child; separator=\"\n\">");
}
private static final ST literal(String text) {
return new ST("<text>").add("text", text);
}
public static void dump(Parser parser, Tree tree) {
System.out.println(process(parser.getRuleNames(),tree).render());
}
private static String getNodeText(Tree t, String[] ruleNames) {
if ( t instanceof RuleContext ) {
int ruleIndex = ((RuleContext)t).getRuleContext().getRuleIndex();
String ruleName = ruleNames[ruleIndex];
return ruleName;
}
else if ( t instanceof ErrorNode) {
return t.toString();
}
else if ( t instanceof TerminalNode) {
Token symbol = ((TerminalNode)t).getSymbol();
if (symbol != null) {
String s = symbol.getText();
return s;
}
}
Object payload = t.getPayload();
if ( payload instanceof Token ) {
return ((Token)payload).getText();
}
return t.getPayload().toString();
}
private static ST process(String[] ruleNames, Tree t) {
if(t.getChildCount()==0) {
return literal(getNodeText(t, ruleNames));
} else if(t.getChildCount()==1) {
//pass-through rules
return process(ruleNames,t.getChild(0));
} else {
ST out=template();
out.add("rule_text", getNodeText(t, ruleNames));
for(int i=0;i<t.getChildCount();i++) {
out.add("child", process(ruleNames,t.getChild(i)));
}
return out;
}
}
}
对于Kotlin,可以使用这个扩展函数
fun Tree.format(parser: Parser, indent: Int = 0): String = buildString {
val tree = this@format
val prefix = " ".repeat(indent)
append(prefix)
append(Trees.getNodeText(tree, parser))
if (tree.childCount != 0) {
append(" (\n")
for (i in 0 until tree.childCount) {
append(tree.getChild(i).format(parser, indent + 1))
append("\n")
}
append(prefix).append(")")
}
}