如何在 Java 中使用 GATE Annie 获取命名实体提取?
How to get Named Entity Extraction using GATE Annie in Java?
我是 GATE ANNIE 的新手。我尝试了 GATE GUI 界面 并获得了在其上执行任务的经验。我想知道如何在 Java 中实现 命名实体提取 ?
我进行了研发,但找不到任何关于命名实体提取的教程。
是否有任何代码可以在JavaGATE ANNIE中找到命名实体提取 ?
import gate.*;
import gate.creole.ANNIEConstants;
import gate.util.persistence.PersistenceManager;
import java.io.File;
import java.util.*;
public class AnnieNerExample {
public static void main(String[] args) throws Exception {
Gate.setGateHome(new File("C:\Program Files\GATE_Developer_8.1"));
Gate.init();
LanguageAnalyser controller = (LanguageAnalyser) PersistenceManager
.loadObjectFromFile(new File(new File(Gate.getPluginsHome(),
ANNIEConstants.PLUGIN_DIR), ANNIEConstants.DEFAULT_FILE));
Corpus corpus = Factory.newCorpus("corpus");
Document document = Factory.newDocument(
"Michael Jordan is a professor at the University of California, Berkeley.");
corpus.add(document); controller.setCorpus(corpus);
controller.execute();
document.getAnnotations().get(new HashSet<>(Arrays.asList("Person", "Organization", "Location")))
.forEach(a -> System.err.format("%s - \"%s\" [%d to %d]\n",
a.getType(), Utils.stringFor(document, a),
a.getStartNode().getOffset(), a.getEndNode().getOffset()));
//Don't forget to release GATE resources
Factory.deleteResource(document); Factory.deleteResource(corpus); Factory.deleteResource(controller);
}
}
输出:
Person - "Michael Jordan" [0 to 14]
Organization - "University of California" [37 to 61]
Location - "Berkeley" [63 to 71]
罐子
两种可能:
- 手动
Quick Start with GATE Embedded:
add $GATE_HOME/bin/gate.jar
and the JAR files in $GATE_HOME/lib
to the Java CLASSPATH ($GATE_HOME
is the GATE root directory)
Maven
<dependency>
<groupId>uk.ac.gate</groupId>
<artifactId>gate-core</artifactId>
<version>8.4</version>
</dependency>
用于在 java 代码中开发和使用。最好使用门嵌入。
pom.xml 文件
<dependencies>
<dependency>
<groupId>uk.ac.gate</groupId>
<artifactId>gate-core</artifactId>
<version>8.6.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/uk.ac.gate.plugins/annie -->
<dependency>
<groupId>uk.ac.gate.plugins</groupId>
<artifactId>annie</artifactId>
<version>8.6</version>
<scope>provided</scope>
</dependency>
</dependencies>
Main.java 文件在 src
import gate.*;
import gate.creole.ANNIEConstants;
import gate.creole.Plugin;
import gate.creole.ResourceReference;
import gate.util.persistence.PersistenceManager;
import java.net.URL;
import java.util.Arrays;
import java.util.HashSet;
public class Main {
public static void main(String[] args) throws Exception {
Gate.init();
Plugin anniePlugin = new Plugin.Maven("uk.ac.gate.plugins", "annie", "8.6");
Gate.getCreoleRegister().registerPlugin(anniePlugin);
URL annieFile = new ResourceReference(anniePlugin, "resources/" + ANNIEConstants.DEFAULT_FILE).toURL();
LanguageAnalyser controller = (LanguageAnalyser) PersistenceManager.loadObjectFromUrl(annieFile);
Corpus corpus = Factory.newCorpus("corpus");
Document document = Factory.newDocument("Michael Jordan is a professor at the University of California, Berkeley.");
corpus.add(document);
controller.setCorpus(corpus);
controller.execute();
for (Annotation obj : document.getAnnotations().get(new HashSet<String>(Arrays.asList("Person", "Organization", "Location")))) {
System.out.print("type : " + obj.getType());
System.out.print("\t data : " + Utils.stringFor(document, obj));
System.out.print("\t start : " + obj.getStartNode().getOffset());
System.out.println("\t end : " + obj.getEndNode().getOffset());
}
Factory.deleteResource(document);
Factory.deleteResource(corpus);
Factory.deleteResource(controller);
}
}
示例代码https://github.com/hsali/gate-developer-annie-plugin-example
我是 GATE ANNIE 的新手。我尝试了 GATE GUI 界面 并获得了在其上执行任务的经验。我想知道如何在 Java 中实现 命名实体提取 ?
我进行了研发,但找不到任何关于命名实体提取的教程。
是否有任何代码可以在JavaGATE ANNIE中找到命名实体提取 ?
import gate.*;
import gate.creole.ANNIEConstants;
import gate.util.persistence.PersistenceManager;
import java.io.File;
import java.util.*;
public class AnnieNerExample {
public static void main(String[] args) throws Exception {
Gate.setGateHome(new File("C:\Program Files\GATE_Developer_8.1"));
Gate.init();
LanguageAnalyser controller = (LanguageAnalyser) PersistenceManager
.loadObjectFromFile(new File(new File(Gate.getPluginsHome(),
ANNIEConstants.PLUGIN_DIR), ANNIEConstants.DEFAULT_FILE));
Corpus corpus = Factory.newCorpus("corpus");
Document document = Factory.newDocument(
"Michael Jordan is a professor at the University of California, Berkeley.");
corpus.add(document); controller.setCorpus(corpus);
controller.execute();
document.getAnnotations().get(new HashSet<>(Arrays.asList("Person", "Organization", "Location")))
.forEach(a -> System.err.format("%s - \"%s\" [%d to %d]\n",
a.getType(), Utils.stringFor(document, a),
a.getStartNode().getOffset(), a.getEndNode().getOffset()));
//Don't forget to release GATE resources
Factory.deleteResource(document); Factory.deleteResource(corpus); Factory.deleteResource(controller);
}
}
输出:
Person - "Michael Jordan" [0 to 14]
Organization - "University of California" [37 to 61]
Location - "Berkeley" [63 to 71]
罐子
两种可能:
- 手动
Quick Start with GATE Embedded:
add
$GATE_HOME/bin/gate.jar
and the JAR files in$GATE_HOME/lib
to the Java CLASSPATH ($GATE_HOME
is the GATE root directory)
Maven
<dependency> <groupId>uk.ac.gate</groupId> <artifactId>gate-core</artifactId> <version>8.4</version> </dependency>
用于在 java 代码中开发和使用。最好使用门嵌入。
pom.xml 文件
<dependencies>
<dependency>
<groupId>uk.ac.gate</groupId>
<artifactId>gate-core</artifactId>
<version>8.6.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/uk.ac.gate.plugins/annie -->
<dependency>
<groupId>uk.ac.gate.plugins</groupId>
<artifactId>annie</artifactId>
<version>8.6</version>
<scope>provided</scope>
</dependency>
</dependencies>
Main.java 文件在 src
import gate.*;
import gate.creole.ANNIEConstants;
import gate.creole.Plugin;
import gate.creole.ResourceReference;
import gate.util.persistence.PersistenceManager;
import java.net.URL;
import java.util.Arrays;
import java.util.HashSet;
public class Main {
public static void main(String[] args) throws Exception {
Gate.init();
Plugin anniePlugin = new Plugin.Maven("uk.ac.gate.plugins", "annie", "8.6");
Gate.getCreoleRegister().registerPlugin(anniePlugin);
URL annieFile = new ResourceReference(anniePlugin, "resources/" + ANNIEConstants.DEFAULT_FILE).toURL();
LanguageAnalyser controller = (LanguageAnalyser) PersistenceManager.loadObjectFromUrl(annieFile);
Corpus corpus = Factory.newCorpus("corpus");
Document document = Factory.newDocument("Michael Jordan is a professor at the University of California, Berkeley.");
corpus.add(document);
controller.setCorpus(corpus);
controller.execute();
for (Annotation obj : document.getAnnotations().get(new HashSet<String>(Arrays.asList("Person", "Organization", "Location")))) {
System.out.print("type : " + obj.getType());
System.out.print("\t data : " + Utils.stringFor(document, obj));
System.out.print("\t start : " + obj.getStartNode().getOffset());
System.out.println("\t end : " + obj.getEndNode().getOffset());
}
Factory.deleteResource(document);
Factory.deleteResource(corpus);
Factory.deleteResource(controller);
}
}
示例代码https://github.com/hsali/gate-developer-annie-plugin-example