卢克+lucene 5.4.1
Luke + lucene 5.4.1
我正在使用自定义分析器使用 Lucene 5.4.1 版本构建索引文件,并且我正在尝试使用 Luke 在索引文件中查找数据。我正在尝试使用 Luke 添加我的自定义分析器,但我在分析器选项卡中找不到它。
我正在使用以下语法将我的分析器添加到 Luke java -cp "pivot-luke-with-deps.jar;CatalogSearchAnalyzer.jar" org.getopt.luke.Luke
我的分析器代码`
public class CatalogSearchAnalyzer extends Analyzer {
private Version matchVersion;
private String termValue;
private boolean retMultiple;
public static final String[] STOP_WORDS = { "a", "and", "are", "as", "at",
"be", "but", "by", "for", "if", "in", "into", "is", "it", "no",
"not", "of", "on", "or", "such", "t", "that", "the", "their",
"then", "there", "these", "they", "this", "to", "was", "will",
"with" };
private CharArraySet stopTable;
private int maxTokenLength;
public CatalogSearchAnalyzer(Version matchVersion) {
this.stopTable = StopFilter.makeStopSet(STOP_WORDS);
this.maxTokenLength = 255;
this.matchVersion = matchVersion;
}
public CatalogSearchAnalyzer() {
this(STOP_WORDS);
}
public void setTermValue(String termValue) {
}
public void setRetMultiple(boolean retMultiple) {
}
public CatalogSearchAnalyzer(String[] stopWords) {
this.stopTable = StopFilter.makeStopSet(STOP_WORDS);
this.maxTokenLength = 255;
StopFilter.makeStopSet(stopWords);
}
private TokenStream getStemmingFilter(TokenStream result) {
PorterStemFilter temp = new PorterStemFilter(result);
temp.setRetMultiple(this.retMultiple);
return temp;
}
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
StandardTokenizer st = new StandardTokenizer();
st.setMaxTokenLength(this.maxTokenLength);
Tokenizer tk = st;
TokenStream ts = new StandardFilter(tk);
ts = new LowerCaseFilter(ts);
ts = new StopFilter(ts, this.stopTable);
ts = getStemmingFilter(ts);
return new Analyzer.TokenStreamComponents(tk, ts) {
protected void setReader(Reader reader) {
int m = CatalogSearchAnalyzer.this.maxTokenLength;
if (this.source instanceof CmgtTokenizer) {
((CmgtTokenizer) this.source).setMaxTokenLength(m);
}
super.setReader(reader);
}
};
}
}
`
将我的罐子添加到 Luke 时,我没有遇到任何异常。
提前感谢您对此的调查。
如问题下的评论部分所述,解决方案是使用原始的基于 thinlet 的 luke 版本,而不是基于 pivot 的 luke。基于 pivot 的 luke 正在开发中,尚不支持所有功能(尽管鼓励进行更多测试!)
Thinlet luke on master(当前):https://github.com/DmitryKey/luke
我正在使用自定义分析器使用 Lucene 5.4.1 版本构建索引文件,并且我正在尝试使用 Luke 在索引文件中查找数据。我正在尝试使用 Luke 添加我的自定义分析器,但我在分析器选项卡中找不到它。
我正在使用以下语法将我的分析器添加到 Luke java -cp "pivot-luke-with-deps.jar;CatalogSearchAnalyzer.jar" org.getopt.luke.Luke
我的分析器代码`
public class CatalogSearchAnalyzer extends Analyzer {
private Version matchVersion;
private String termValue;
private boolean retMultiple;
public static final String[] STOP_WORDS = { "a", "and", "are", "as", "at",
"be", "but", "by", "for", "if", "in", "into", "is", "it", "no",
"not", "of", "on", "or", "such", "t", "that", "the", "their",
"then", "there", "these", "they", "this", "to", "was", "will",
"with" };
private CharArraySet stopTable;
private int maxTokenLength;
public CatalogSearchAnalyzer(Version matchVersion) {
this.stopTable = StopFilter.makeStopSet(STOP_WORDS);
this.maxTokenLength = 255;
this.matchVersion = matchVersion;
}
public CatalogSearchAnalyzer() {
this(STOP_WORDS);
}
public void setTermValue(String termValue) {
}
public void setRetMultiple(boolean retMultiple) {
}
public CatalogSearchAnalyzer(String[] stopWords) {
this.stopTable = StopFilter.makeStopSet(STOP_WORDS);
this.maxTokenLength = 255;
StopFilter.makeStopSet(stopWords);
}
private TokenStream getStemmingFilter(TokenStream result) {
PorterStemFilter temp = new PorterStemFilter(result);
temp.setRetMultiple(this.retMultiple);
return temp;
}
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
StandardTokenizer st = new StandardTokenizer();
st.setMaxTokenLength(this.maxTokenLength);
Tokenizer tk = st;
TokenStream ts = new StandardFilter(tk);
ts = new LowerCaseFilter(ts);
ts = new StopFilter(ts, this.stopTable);
ts = getStemmingFilter(ts);
return new Analyzer.TokenStreamComponents(tk, ts) {
protected void setReader(Reader reader) {
int m = CatalogSearchAnalyzer.this.maxTokenLength;
if (this.source instanceof CmgtTokenizer) {
((CmgtTokenizer) this.source).setMaxTokenLength(m);
}
super.setReader(reader);
}
};
}
}
` 将我的罐子添加到 Luke 时,我没有遇到任何异常。
提前感谢您对此的调查。
如问题下的评论部分所述,解决方案是使用原始的基于 thinlet 的 luke 版本,而不是基于 pivot 的 luke。基于 pivot 的 luke 正在开发中,尚不支持所有功能(尽管鼓励进行更多测试!)
Thinlet luke on master(当前):https://github.com/DmitryKey/luke