使用 dkpro 语义相似度与 uby
Use dkpro semantic similarity with uby
我想计算具有 dkpro 相似度 (https://dkpro.github.io/dkpro-similarity/) 的字符串之间的相似度,它有效,如下所示:
import org.dkpro.similarity.algorithms.api.SimilarityException;
import org.dkpro.similarity.algorithms.api.TextSimilarityMeasure;
import org.dkpro.similarity.algorithms.lsr.LexSemResourceComparator;
import org.dkpro.similarity.algorithms.lsr.gloss.GlossOverlapComparator;
import org.dkpro.similarity.algorithms.lsr.path.JiangConrathComparator;
import org.dkpro.similarity.algorithms.lsr.path.LeacockChodorowComparator;
import org.dkpro.similarity.algorithms.lsr.path.LinComparator;
import org.dkpro.similarity.algorithms.lsr.path.ResnikComparator;
import org.dkpro.similarity.algorithms.lsr.path.WuPalmerComparator;
import de.tudarmstadt.ukp.dkpro.lexsemresource.LexicalSemanticResource;
import de.tudarmstadt.ukp.dkpro.lexsemresource.core.ResourceFactory;
import de.tudarmstadt.ukp.dkpro.lexsemresource.exception.LexicalSemanticResourceException;
import de.tudarmstadt.ukp.dkpro.lexsemresource.exception.ResourceLoaderException;
import learninggoals.analysis.controller.settingtypes.SimilarityAlgorithm;
public class SemResourceComparator implements WordsComparator{
private LexicalSemanticResource resource;
private LexSemResourceComparator comparator;
//en lang
public SemResourceComparator(String resourcetype, SimilarityAlgorithm algorithm, String lang) {
try {
resource = ResourceFactory.getInstance().get(resourcetype, lang);
} catch (ResourceLoaderException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
switch(algorithm){
/*case ESA://this is vector
comparator = new GlossOverlapComparator(resource, false);
break;*/
case GLOSSOVERLAP:
comparator = new GlossOverlapComparator(resource, false);
break;
case JIANG_CONRATH:
comparator = new JiangConrathComparator(resource, resource.getRoot());
break;
case LEACOCK_CHODOROW:
comparator = new LeacockChodorowComparator(resource);
break;
case LIN:
comparator = new LinComparator(resource, resource.getRoot());
break;
case RESNIK:
comparator = new ResnikComparator(resource, resource.getRoot());
break;
case WUPALMER:
comparator = new WuPalmerComparator(resource, resource.getRoot());
break;
default:
break;
}
} catch (LexicalSemanticResourceException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
public double compareWords(String w1, String w2) {
try {
return comparator.getSimilarity(resource.getEntity(w1), resource.getEntity(w2));
} catch (SimilarityException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LexicalSemanticResourceException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return 0;
}
我这样使用 class:
double intermscore = comparator.compareWords(word1, word2);
我使用LexicalSemanticResource作为比较资源,它可以是wordnet、wikipedia、germanet等。现在我注意到我需要的所有资源都在uby中(https://www.ukp.tu-darmstadt.de/data/lexical-resources/uby/, https://github.com/dkpro/dkpro-uby/blob/master/de.tudarmstadt.ukp.uby.lmf.api-asl/src/main/java/de/tudarmstadt/ukp/lmf/api/Uby.java)。
我的问题是:我可以用来自 uby 的资源替换资源,这样我就不必在每次需要时都重新包含新资源吗?所以我想使用 uby 资源而不是 ResourceFactory.getInstance().get("wordnet"),所以像 new Uby().getLexicalResource("wordnet") - 然而来自 uby 的词法资源是与我现在用于语义比较的 LexicalSemanticResource 不同。所以:而不是使用例如LexicalSemanticResource wordnet,我想使用 uby 的 wordnet 作为比较器。有办法吗?
目前没有办法做到这一点。 Uby资源和LSR资源不兼容
有切换的计划,但这个问题已经有一段时间了:
https://github.com/dkpro/dkpro-similarity/issues/39
我想计算具有 dkpro 相似度 (https://dkpro.github.io/dkpro-similarity/) 的字符串之间的相似度,它有效,如下所示:
import org.dkpro.similarity.algorithms.api.SimilarityException;
import org.dkpro.similarity.algorithms.api.TextSimilarityMeasure;
import org.dkpro.similarity.algorithms.lsr.LexSemResourceComparator;
import org.dkpro.similarity.algorithms.lsr.gloss.GlossOverlapComparator;
import org.dkpro.similarity.algorithms.lsr.path.JiangConrathComparator;
import org.dkpro.similarity.algorithms.lsr.path.LeacockChodorowComparator;
import org.dkpro.similarity.algorithms.lsr.path.LinComparator;
import org.dkpro.similarity.algorithms.lsr.path.ResnikComparator;
import org.dkpro.similarity.algorithms.lsr.path.WuPalmerComparator;
import de.tudarmstadt.ukp.dkpro.lexsemresource.LexicalSemanticResource;
import de.tudarmstadt.ukp.dkpro.lexsemresource.core.ResourceFactory;
import de.tudarmstadt.ukp.dkpro.lexsemresource.exception.LexicalSemanticResourceException;
import de.tudarmstadt.ukp.dkpro.lexsemresource.exception.ResourceLoaderException;
import learninggoals.analysis.controller.settingtypes.SimilarityAlgorithm;
public class SemResourceComparator implements WordsComparator{
private LexicalSemanticResource resource;
private LexSemResourceComparator comparator;
//en lang
public SemResourceComparator(String resourcetype, SimilarityAlgorithm algorithm, String lang) {
try {
resource = ResourceFactory.getInstance().get(resourcetype, lang);
} catch (ResourceLoaderException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
switch(algorithm){
/*case ESA://this is vector
comparator = new GlossOverlapComparator(resource, false);
break;*/
case GLOSSOVERLAP:
comparator = new GlossOverlapComparator(resource, false);
break;
case JIANG_CONRATH:
comparator = new JiangConrathComparator(resource, resource.getRoot());
break;
case LEACOCK_CHODOROW:
comparator = new LeacockChodorowComparator(resource);
break;
case LIN:
comparator = new LinComparator(resource, resource.getRoot());
break;
case RESNIK:
comparator = new ResnikComparator(resource, resource.getRoot());
break;
case WUPALMER:
comparator = new WuPalmerComparator(resource, resource.getRoot());
break;
default:
break;
}
} catch (LexicalSemanticResourceException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
public double compareWords(String w1, String w2) {
try {
return comparator.getSimilarity(resource.getEntity(w1), resource.getEntity(w2));
} catch (SimilarityException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LexicalSemanticResourceException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return 0;
}
我这样使用 class:
double intermscore = comparator.compareWords(word1, word2);
我使用LexicalSemanticResource作为比较资源,它可以是wordnet、wikipedia、germanet等。现在我注意到我需要的所有资源都在uby中(https://www.ukp.tu-darmstadt.de/data/lexical-resources/uby/, https://github.com/dkpro/dkpro-uby/blob/master/de.tudarmstadt.ukp.uby.lmf.api-asl/src/main/java/de/tudarmstadt/ukp/lmf/api/Uby.java)。
我的问题是:我可以用来自 uby 的资源替换资源,这样我就不必在每次需要时都重新包含新资源吗?所以我想使用 uby 资源而不是 ResourceFactory.getInstance().get("wordnet"),所以像 new Uby().getLexicalResource("wordnet") - 然而来自 uby 的词法资源是与我现在用于语义比较的 LexicalSemanticResource 不同。所以:而不是使用例如LexicalSemanticResource wordnet,我想使用 uby 的 wordnet 作为比较器。有办法吗?
目前没有办法做到这一点。 Uby资源和LSR资源不兼容
有切换的计划,但这个问题已经有一段时间了: https://github.com/dkpro/dkpro-similarity/issues/39