如何在 Java 中使用 GATE Annie 获取命名实体提取?

How to get Named Entity Extraction using GATE Annie in Java?

我是 GATE ANNIE 的新手。我尝试了 GATE GUI 界面 并获得了在其上执行任务的经验。我想知道如何在 Java 中实现 命名实体提取

我进行了研发,但找不到任何关于命名实体提取的教程。

是否有任何代码可以在JavaGATE ANNIE中找到命名实体提取 ?

import gate.*;
import gate.creole.ANNIEConstants;
import gate.util.persistence.PersistenceManager;
import java.io.File;
import java.util.*;

public class AnnieNerExample {

    public static void main(String[] args) throws Exception {
        Gate.setGateHome(new File("C:\Program Files\GATE_Developer_8.1"));
        Gate.init();

        LanguageAnalyser controller = (LanguageAnalyser) PersistenceManager
                .loadObjectFromFile(new File(new File(Gate.getPluginsHome(),
                        ANNIEConstants.PLUGIN_DIR), ANNIEConstants.DEFAULT_FILE));

        Corpus corpus = Factory.newCorpus("corpus");
        Document document = Factory.newDocument(
                "Michael Jordan is a professor at the University of California, Berkeley.");
        corpus.add(document); controller.setCorpus(corpus); 
        controller.execute();

        document.getAnnotations().get(new HashSet<>(Arrays.asList("Person", "Organization", "Location")))
            .forEach(a -> System.err.format("%s - \"%s\" [%d to %d]\n", 
                    a.getType(), Utils.stringFor(document, a),
                    a.getStartNode().getOffset(), a.getEndNode().getOffset()));

        //Don't forget to release GATE resources 
        Factory.deleteResource(document); Factory.deleteResource(corpus); Factory.deleteResource(controller);
    }
}

输出:

Person - "Michael Jordan" [0 to 14]
Organization - "University of California" [37 to 61]
Location - "Berkeley" [63 to 71]

罐子

两种可能:

  1. 手动

Quick Start with GATE Embedded:

add $GATE_HOME/bin/gate.jar and the JAR files in $GATE_HOME/lib to the Java CLASSPATH ($GATE_HOME is the GATE root directory)

  1. Maven

    <dependency>
        <groupId>uk.ac.gate</groupId>
        <artifactId>gate-core</artifactId>
        <version>8.4</version>
    </dependency>
    

用于在 java 代码中开发和使用。最好使用门嵌入。

pom.xml 文件

<dependencies>
    <dependency>
        <groupId>uk.ac.gate</groupId>
        <artifactId>gate-core</artifactId>
        <version>8.6.1</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/uk.ac.gate.plugins/annie -->
    <dependency>
        <groupId>uk.ac.gate.plugins</groupId>
        <artifactId>annie</artifactId>
        <version>8.6</version>
        <scope>provided</scope>
    </dependency>

</dependencies>

Main.java 文件在 src

import gate.*;
import gate.creole.ANNIEConstants;
import gate.creole.Plugin;
import gate.creole.ResourceReference;
import gate.util.persistence.PersistenceManager;

import java.net.URL;
import java.util.Arrays;
import java.util.HashSet;

public class Main {

    public static void main(String[] args) throws Exception {
        Gate.init();
        Plugin anniePlugin = new Plugin.Maven("uk.ac.gate.plugins", "annie", "8.6");
        Gate.getCreoleRegister().registerPlugin(anniePlugin);
        URL annieFile = new ResourceReference(anniePlugin, "resources/" + ANNIEConstants.DEFAULT_FILE).toURL();

        LanguageAnalyser controller = (LanguageAnalyser) PersistenceManager.loadObjectFromUrl(annieFile);

        Corpus corpus = Factory.newCorpus("corpus");
        Document document = Factory.newDocument("Michael Jordan is a professor at the University of California, Berkeley.");
        corpus.add(document);
        controller.setCorpus(corpus);
        controller.execute();


        for (Annotation obj : document.getAnnotations().get(new HashSet<String>(Arrays.asList("Person", "Organization", "Location")))) {
            System.out.print("type : " + obj.getType());
            System.out.print("\t data : " + Utils.stringFor(document, obj));
            System.out.print("\t start : " + obj.getStartNode().getOffset());
            System.out.println("\t end : " + obj.getEndNode().getOffset());
        }
        Factory.deleteResource(document);
        Factory.deleteResource(corpus);
        Factory.deleteResource(controller);
    }
}

示例代码https://github.com/hsali/gate-developer-annie-plugin-example