如何访问 Stanford Parser 返回的依赖树和选区树中的各个节点?

How do I access individual nodes in the dependency tree and constituency tree returned by the Stanford Parser?

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

using java.io;
using edu.stanford.nlp.process;
using edu.stanford.nlp.ling;
using edu.stanford.nlp.trees;
using edu.stanford.nlp.parser.lexparser;
using Console = System.Console;

namespace Parser
{   

    class Parser
    {
        //loads the lexical parser
        private static LexicalizedParser LoadLexicalizedParser()
        {
            // Path to models extracted from `stanford-parser-3.5.2-models.jar`
            var jarRoot = @"E:\Project\stanford-parser-full-2015-04-20\stanford-parser-3.5.2-models";
            var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";

            // Loading english PCFG parser from file
            var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");

            return lp;
        }

        //gets the lexical tree for a 'sentence'
        private static Tree GetLexicalTree(LexicalizedParser lp, string sentence)
        {
            string[] words = sentence.Split(' ');   
            // This sample shows parsing a list of correctly tokenized words            
            var rawWords = Sentence.toCoreLabelList(words);
            var tree = lp.apply(rawWords);

            return tree;
        }

        //gets the constituency tree from the lexical 'tree' as a string
        private static string GetConstituencyTree(Tree tree)
        {
            return tree.pennString();
        }

        //gets the dependency tree from the lexical 'tree' as a string
        private static string GetDependencyTree(Tree tree)
        {
            // Extract dependencies from lexical tree
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs = gsf.newGrammaticalStructure(tree);
            var tdl = gs.typedDependenciesCCprocessed();

            string dependencyTree = String.Empty;

            for (int i = 0; i < tdl.size(); ++i)
                dependencyTree += tdl.get(i) + "\n";

            return dependencyTree;
        }


        static void Main()
        {
            var lp = LoadLexicalizedParser();
            string sentence = "This is an easy sentence.";

            Tree tree = GetLexicalTree(lp, sentence);

            string constituencyTree = GetConstituencyTree(tree);
            string dependencyTree = GetDependencyTree(tree);

            Console.WriteLine("Constituency Tree\n" + constituencyTree);
            Console.WriteLine("Dependency Tree\n" + dependencyTree);

            //// Extract collapsed dependencies from parsed tree
            //var tp = new TreePrint("penn,typedDependenciesCollapsed");
            //tp.printTree(tree);
        }
    }
}

在此代码中,我将选区树和依存关系树作为字符串获取。 但是我想使用 'Tree' 类型本身来使用它们,即。我想访问和操作变量 'tree' 的节点。有什么办法可以做到吗? 或者我是否必须创建自己的树数据结构并通过处理字符串('constituencyTree' & 'dependencyTree')来获取各个节点?

[我目前正在做的一个小项目需要这个。]

是的,存在大量数据结构可用于选区树和依赖树。对于选区树,您需要使用 Tree 数据结构,它具有许多有用的内置函数来遍历树、获取所有终端节点等。

对于依赖关系树,您可以使用 TypedDependency objects where each TypedDependency represents the relation between a governor word and a dependent word, or you can work with a SemanticGraph 列表。要将您在示例中命名为 tdlTypedDependency 列表转换为 SemanticGraph,只需将列表传递给构造函数:

SemanticGraph sg = new SemanticGraph(tdl);