如何使用 Lucene/Hibernate 搜索关键字为 "With" 的姓名?
How to search name having keyword "With" using Lucene/Hibernate?
要搜索的人的姓名是 "Suleman Kumar With",其中 With 是姓氏。
它适用于所有其他名称但不适用于此英文关键字
以下是我创建 Lucene 索引的方式:
@Fields({ @Field(index = Index.YES, store = Store.NO),
@Field(name = "LastName_Sort", index = Index.YES, analyzer = @Analyzer(definition = "sortAnalyzer")) })
@Column(name = "LASTNAME", length = 50)
public String getLastName() {
return lastName;
}
sortAnalyzer 具有以下配置:
@AnalyzerDef(name = "sortAnalyzer",
tokenizer = @TokenizerDef(factory = KeywordTokenizerFactory.class),
filters = {
@TokenFilterDef(factory = LowerCaseFilterFactory.class),
@TokenFilterDef(factory = PatternReplaceFilterFactory.class, params = {
@Parameter(name = "pattern", value = "('-&\.,\(\))"),
@Parameter(name = "replacement", value = " "),
@Parameter(name = "replace", value = "all")
}),
@TokenFilterDef(factory = PatternReplaceFilterFactory.class, params = {
@Parameter(name = "pattern", value = "([^0-9\p{L} ])"),
@Parameter(name = "replacement", value = ""),
@Parameter(name = "replace", value = "all")
})
}
)
搜索姓氏和主键:ID,但出现令牌不匹配错误。
我用自己的 "Custom Analyzer" 实现了它。
public class IgnoreStopWordsAnalyzer extends StopwordAnalyzerBase {
public IgnoreStopWordsAnalyzer() {
super(Version.LUCENE_36, null);
}
@Override
protected ReusableAnalyzerBase.TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
final StandardTokenizer src = new StandardTokenizer(Version.LUCENE_36, reader);
TokenStream tok = new StandardFilter(Version.LUCENE_36, src);
tok = new LowerCaseFilter(Version.LUCENE_36, tok);
tok = new StopFilter(Version.LUCENE_36, tok, this.stopwords);
return new ReusableAnalyzerBase.TokenStreamComponents(src, tok);
}
}
在字段中调用此分析器,停用词将被忽略。
对于休眠搜索版本 5,您可以使用这样的自定义分析器:
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
public class IgnoreStopWordsAnalyzer extends StopwordAnalyzerBase {
public IgnoreStopWordsAnalyzer() {
super(null);
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream tokenStream = new StandardFilter(source);
tokenStream = new LowerCaseFilter(tokenStream);
tokenStream = new StopFilter(tokenStream, this.stopwords);
return new TokenStreamComponents(source, tokenStream);
}
}
要搜索的人的姓名是 "Suleman Kumar With",其中 With 是姓氏。 它适用于所有其他名称但不适用于此英文关键字
以下是我创建 Lucene 索引的方式:
@Fields({ @Field(index = Index.YES, store = Store.NO),
@Field(name = "LastName_Sort", index = Index.YES, analyzer = @Analyzer(definition = "sortAnalyzer")) })
@Column(name = "LASTNAME", length = 50)
public String getLastName() {
return lastName;
}
sortAnalyzer 具有以下配置:
@AnalyzerDef(name = "sortAnalyzer",
tokenizer = @TokenizerDef(factory = KeywordTokenizerFactory.class),
filters = {
@TokenFilterDef(factory = LowerCaseFilterFactory.class),
@TokenFilterDef(factory = PatternReplaceFilterFactory.class, params = {
@Parameter(name = "pattern", value = "('-&\.,\(\))"),
@Parameter(name = "replacement", value = " "),
@Parameter(name = "replace", value = "all")
}),
@TokenFilterDef(factory = PatternReplaceFilterFactory.class, params = {
@Parameter(name = "pattern", value = "([^0-9\p{L} ])"),
@Parameter(name = "replacement", value = ""),
@Parameter(name = "replace", value = "all")
})
}
)
搜索姓氏和主键:ID,但出现令牌不匹配错误。
我用自己的 "Custom Analyzer" 实现了它。
public class IgnoreStopWordsAnalyzer extends StopwordAnalyzerBase {
public IgnoreStopWordsAnalyzer() {
super(Version.LUCENE_36, null);
}
@Override
protected ReusableAnalyzerBase.TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
final StandardTokenizer src = new StandardTokenizer(Version.LUCENE_36, reader);
TokenStream tok = new StandardFilter(Version.LUCENE_36, src);
tok = new LowerCaseFilter(Version.LUCENE_36, tok);
tok = new StopFilter(Version.LUCENE_36, tok, this.stopwords);
return new ReusableAnalyzerBase.TokenStreamComponents(src, tok);
}
}
在字段中调用此分析器,停用词将被忽略。
对于休眠搜索版本 5,您可以使用这样的自定义分析器:
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
public class IgnoreStopWordsAnalyzer extends StopwordAnalyzerBase {
public IgnoreStopWordsAnalyzer() {
super(null);
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream tokenStream = new StandardFilter(source);
tokenStream = new LowerCaseFilter(tokenStream);
tokenStream = new StopFilter(tokenStream, this.stopwords);
return new TokenStreamComponents(source, tokenStream);
}
}