Lucene.net 否定条款无效
Lucene.net Negation clause is not working
我对 Lucene.net 非常陌生,虽然我无法实现基本功能,即不在。
我的要求是搜索“road?construction”而不包含“Works”一词。
例如
主要道路建设工程 -- 无效
道路建设和维护服务 -- 有效(不包含单词 Works)
请参考我下面的代码。
string searchQuery = "\"road?construction\"*";
BooleanQuery query2 = new BooleanQuery();
Query query;
try
{
query = parser.Parse(searchQuery.Trim());
}
catch (ParseException)
{
query = parser.Parse(QueryParser.Escape(searchQuery.Trim()));
}
query2.Add(query,Occur.SHOULD);
query2.Add(new BooleanClause(new TermQuery (new Term("Name", "Works")), Occur.MUST_NOT));
这仍然会将上述记录都包含在搜索结果中。我想剪掉无效记录(先)
这是后台生成的结果查询。
请提出解决方法。
在此先致谢。
不确定为什么要在短语中加入通配符。如果您正在寻找 "road construction" 那么这就是您所需要的。如果您希望允许一些变化,那么 "slop phrase" 可能就是您所需要的。 "road construction"~2。数字部分允许 n "operations" 之间有 n 个额外的单词。
这是一组测试,展示了您的示例(TestExpr2、TestExpr3)和一些工作变体(TestExpr1 和 TestQuery)。
希望对您有所帮助
[TestClass]
public class UnitTest7
{
[TestMethod]
public void TestExpr1()
{
TestExpr("\"road construction\" -works");
}
[TestMethod]
public void TestExpr2()
{
TestExpr("\"road?construction\"* -works");
}
[TestMethod]
public void TestExpr3()
{
TestExpr(QueryParser.Escape("\"road?construction\"* -works"));
}
private void TestExpr(string expr)
{
var writer = CreateIndex();
Add(writer, "Main Road Construction Works");
Add(writer, "Road Construction And Maintenance Services");
writer.Flush(true, true, true);
var searcher = new IndexSearcher(writer.GetReader());
var result = Search(searcher, expr);
Assert.AreEqual(1, result.Count);
Assert.IsTrue(result.Contains("Road Construction And Maintenance Services"));
writer.Dispose();
}
[TestMethod]
public void TestQuery()
{
var writer = CreateIndex();
Add(writer, "Main Road Construction Works");
Add(writer, "Road Construction And Maintenance Services");
writer.Flush(true, true, true);
var searcher = new IndexSearcher(writer.GetReader());
var query = new BooleanQuery();
var p = new PhraseQuery();
p.Add(new Term("name", "road"));
p.Add(new Term("name", "construction"));
query.Add(p, Occur.MUST);
query.Add(new TermQuery(new Term("name", "works")), Occur.MUST_NOT);
var result = Search(searcher, query);
Assert.AreEqual(1, result.Count);
Assert.IsTrue(result.Contains("Road Construction And Maintenance Services"));
writer.Dispose();
}
private List<string> Search(IndexSearcher searcher, string expr)
{
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
var queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "name", analyzer);
var query = queryParser.Parse(expr);
return Search(searcher, query);
}
private List<string> Search(IndexSearcher searcher, Query query)
{
var collector = TopScoreDocCollector.Create(10, true);
searcher.Search(query, collector);
var result = new List<string>();
var matches = collector.TopDocs().ScoreDocs;
foreach (var item in matches)
{
var id = item.Doc;
var doc = searcher.Doc(id);
result.Add(doc.GetField("name").StringValue);
}
return result;
}
IndexWriter CreateIndex()
{
var directory = new RAMDirectory();
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
var writer = new IndexWriter(directory, analyzer, new IndexWriter.MaxFieldLength(1000));
return writer;
}
void Add(IndexWriter writer, string text)
{
var document = new Document();
document.Add(new Field("name", text, Field.Store.YES, Field.Index.ANALYZED));
writer.AddDocument(document);
}
}
我对 Lucene.net 非常陌生,虽然我无法实现基本功能,即不在。
我的要求是搜索“road?construction”而不包含“Works”一词。
例如
主要道路建设工程 -- 无效
道路建设和维护服务 -- 有效(不包含单词 Works)
请参考我下面的代码。
string searchQuery = "\"road?construction\"*";
BooleanQuery query2 = new BooleanQuery();
Query query;
try
{
query = parser.Parse(searchQuery.Trim());
}
catch (ParseException)
{
query = parser.Parse(QueryParser.Escape(searchQuery.Trim()));
}
query2.Add(query,Occur.SHOULD);
query2.Add(new BooleanClause(new TermQuery (new Term("Name", "Works")), Occur.MUST_NOT));
这仍然会将上述记录都包含在搜索结果中。我想剪掉无效记录(先)
这是后台生成的结果查询。
请提出解决方法。
在此先致谢。
不确定为什么要在短语中加入通配符。如果您正在寻找 "road construction" 那么这就是您所需要的。如果您希望允许一些变化,那么 "slop phrase" 可能就是您所需要的。 "road construction"~2。数字部分允许 n "operations" 之间有 n 个额外的单词。
这是一组测试,展示了您的示例(TestExpr2、TestExpr3)和一些工作变体(TestExpr1 和 TestQuery)。
希望对您有所帮助
[TestClass]
public class UnitTest7
{
[TestMethod]
public void TestExpr1()
{
TestExpr("\"road construction\" -works");
}
[TestMethod]
public void TestExpr2()
{
TestExpr("\"road?construction\"* -works");
}
[TestMethod]
public void TestExpr3()
{
TestExpr(QueryParser.Escape("\"road?construction\"* -works"));
}
private void TestExpr(string expr)
{
var writer = CreateIndex();
Add(writer, "Main Road Construction Works");
Add(writer, "Road Construction And Maintenance Services");
writer.Flush(true, true, true);
var searcher = new IndexSearcher(writer.GetReader());
var result = Search(searcher, expr);
Assert.AreEqual(1, result.Count);
Assert.IsTrue(result.Contains("Road Construction And Maintenance Services"));
writer.Dispose();
}
[TestMethod]
public void TestQuery()
{
var writer = CreateIndex();
Add(writer, "Main Road Construction Works");
Add(writer, "Road Construction And Maintenance Services");
writer.Flush(true, true, true);
var searcher = new IndexSearcher(writer.GetReader());
var query = new BooleanQuery();
var p = new PhraseQuery();
p.Add(new Term("name", "road"));
p.Add(new Term("name", "construction"));
query.Add(p, Occur.MUST);
query.Add(new TermQuery(new Term("name", "works")), Occur.MUST_NOT);
var result = Search(searcher, query);
Assert.AreEqual(1, result.Count);
Assert.IsTrue(result.Contains("Road Construction And Maintenance Services"));
writer.Dispose();
}
private List<string> Search(IndexSearcher searcher, string expr)
{
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
var queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "name", analyzer);
var query = queryParser.Parse(expr);
return Search(searcher, query);
}
private List<string> Search(IndexSearcher searcher, Query query)
{
var collector = TopScoreDocCollector.Create(10, true);
searcher.Search(query, collector);
var result = new List<string>();
var matches = collector.TopDocs().ScoreDocs;
foreach (var item in matches)
{
var id = item.Doc;
var doc = searcher.Doc(id);
result.Add(doc.GetField("name").StringValue);
}
return result;
}
IndexWriter CreateIndex()
{
var directory = new RAMDirectory();
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
var writer = new IndexWriter(directory, analyzer, new IndexWriter.MaxFieldLength(1000));
return writer;
}
void Add(IndexWriter writer, string text)
{
var document = new Document();
document.Add(new Field("name", text, Field.Store.YES, Field.Index.ANALYZED));
writer.AddDocument(document);
}
}