如何访问 Stanford Parser 返回的依赖树和选区树中的各个节点?
How do I access individual nodes in the dependency tree and constituency tree returned by the Stanford Parser?
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using java.io;
using edu.stanford.nlp.process;
using edu.stanford.nlp.ling;
using edu.stanford.nlp.trees;
using edu.stanford.nlp.parser.lexparser;
using Console = System.Console;
namespace Parser
{
class Parser
{
//loads the lexical parser
private static LexicalizedParser LoadLexicalizedParser()
{
// Path to models extracted from `stanford-parser-3.5.2-models.jar`
var jarRoot = @"E:\Project\stanford-parser-full-2015-04-20\stanford-parser-3.5.2-models";
var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";
// Loading english PCFG parser from file
var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");
return lp;
}
//gets the lexical tree for a 'sentence'
private static Tree GetLexicalTree(LexicalizedParser lp, string sentence)
{
string[] words = sentence.Split(' ');
// This sample shows parsing a list of correctly tokenized words
var rawWords = Sentence.toCoreLabelList(words);
var tree = lp.apply(rawWords);
return tree;
}
//gets the constituency tree from the lexical 'tree' as a string
private static string GetConstituencyTree(Tree tree)
{
return tree.pennString();
}
//gets the dependency tree from the lexical 'tree' as a string
private static string GetDependencyTree(Tree tree)
{
// Extract dependencies from lexical tree
var tlp = new PennTreebankLanguagePack();
var gsf = tlp.grammaticalStructureFactory();
var gs = gsf.newGrammaticalStructure(tree);
var tdl = gs.typedDependenciesCCprocessed();
string dependencyTree = String.Empty;
for (int i = 0; i < tdl.size(); ++i)
dependencyTree += tdl.get(i) + "\n";
return dependencyTree;
}
static void Main()
{
var lp = LoadLexicalizedParser();
string sentence = "This is an easy sentence.";
Tree tree = GetLexicalTree(lp, sentence);
string constituencyTree = GetConstituencyTree(tree);
string dependencyTree = GetDependencyTree(tree);
Console.WriteLine("Constituency Tree\n" + constituencyTree);
Console.WriteLine("Dependency Tree\n" + dependencyTree);
//// Extract collapsed dependencies from parsed tree
//var tp = new TreePrint("penn,typedDependenciesCollapsed");
//tp.printTree(tree);
}
}
}
在此代码中,我将选区树和依存关系树作为字符串获取。
但是我想使用 'Tree' 类型本身来使用它们,即。我想访问和操作变量 'tree' 的节点。有什么办法可以做到吗?
或者我是否必须创建自己的树数据结构并通过处理字符串('constituencyTree' & 'dependencyTree')来获取各个节点?
[我目前正在做的一个小项目需要这个。]
是的,存在大量数据结构可用于选区树和依赖树。对于选区树,您需要使用 Tree
数据结构,它具有许多有用的内置函数来遍历树、获取所有终端节点等。
对于依赖关系树,您可以使用 TypedDependency
objects where each TypedDependency
represents the relation between a governor word and a dependent word, or you can work with a SemanticGraph
列表。要将您在示例中命名为 tdl
的 TypedDependency
列表转换为 SemanticGraph
,只需将列表传递给构造函数:
SemanticGraph sg = new SemanticGraph(tdl);
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using java.io;
using edu.stanford.nlp.process;
using edu.stanford.nlp.ling;
using edu.stanford.nlp.trees;
using edu.stanford.nlp.parser.lexparser;
using Console = System.Console;
namespace Parser
{
class Parser
{
//loads the lexical parser
private static LexicalizedParser LoadLexicalizedParser()
{
// Path to models extracted from `stanford-parser-3.5.2-models.jar`
var jarRoot = @"E:\Project\stanford-parser-full-2015-04-20\stanford-parser-3.5.2-models";
var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";
// Loading english PCFG parser from file
var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");
return lp;
}
//gets the lexical tree for a 'sentence'
private static Tree GetLexicalTree(LexicalizedParser lp, string sentence)
{
string[] words = sentence.Split(' ');
// This sample shows parsing a list of correctly tokenized words
var rawWords = Sentence.toCoreLabelList(words);
var tree = lp.apply(rawWords);
return tree;
}
//gets the constituency tree from the lexical 'tree' as a string
private static string GetConstituencyTree(Tree tree)
{
return tree.pennString();
}
//gets the dependency tree from the lexical 'tree' as a string
private static string GetDependencyTree(Tree tree)
{
// Extract dependencies from lexical tree
var tlp = new PennTreebankLanguagePack();
var gsf = tlp.grammaticalStructureFactory();
var gs = gsf.newGrammaticalStructure(tree);
var tdl = gs.typedDependenciesCCprocessed();
string dependencyTree = String.Empty;
for (int i = 0; i < tdl.size(); ++i)
dependencyTree += tdl.get(i) + "\n";
return dependencyTree;
}
static void Main()
{
var lp = LoadLexicalizedParser();
string sentence = "This is an easy sentence.";
Tree tree = GetLexicalTree(lp, sentence);
string constituencyTree = GetConstituencyTree(tree);
string dependencyTree = GetDependencyTree(tree);
Console.WriteLine("Constituency Tree\n" + constituencyTree);
Console.WriteLine("Dependency Tree\n" + dependencyTree);
//// Extract collapsed dependencies from parsed tree
//var tp = new TreePrint("penn,typedDependenciesCollapsed");
//tp.printTree(tree);
}
}
}
在此代码中,我将选区树和依存关系树作为字符串获取。 但是我想使用 'Tree' 类型本身来使用它们,即。我想访问和操作变量 'tree' 的节点。有什么办法可以做到吗? 或者我是否必须创建自己的树数据结构并通过处理字符串('constituencyTree' & 'dependencyTree')来获取各个节点?
[我目前正在做的一个小项目需要这个。]
是的,存在大量数据结构可用于选区树和依赖树。对于选区树,您需要使用 Tree
数据结构,它具有许多有用的内置函数来遍历树、获取所有终端节点等。
对于依赖关系树,您可以使用 TypedDependency
objects where each TypedDependency
represents the relation between a governor word and a dependent word, or you can work with a SemanticGraph
列表。要将您在示例中命名为 tdl
的 TypedDependency
列表转换为 SemanticGraph
,只需将列表传递给构造函数:
SemanticGraph sg = new SemanticGraph(tdl);