某些查询的一致 Lucene.NET 运行时异常
Consistent Lucene.NET runtime exception on certain queries
我正在使用 Lucene.NET 在我们的应用程序中整合全文搜索的概念证明。有些查询工作正常,有些似乎 return 结果与 Luke 工具 returning 不匹配。更有问题的是,这个查询:
(Description:tasty) (Gtin:00018389732061)
总是产生这个异常:
An unhandled exception of type 'System.IndexOutOfRangeException'
occurred in Lucene.Net.dll at Lucene.Net.Search.TermScorer.Score()
in d:\Lucene.Net\FullRepo\trunk\src\core\Search\TermScorer.cs:line 136
at
Lucene.Net.Search.BooleanScorer.BooleanScorerCollector.Collect(Int32
doc) in
d:\Lucene.Net\FullRepo\trunk\src\core\Search\BooleanScorer.cs:line 88
at Lucene.Net.Search.TermScorer.Score(Collector c, Int32 end, Int32
firstDocID) in
d:\Lucene.Net\FullRepo\trunk\src\core\Search\TermScorer.cs:line 80
at Lucene.Net.Search.BooleanScorer.Score(Collector collector, Int32
max, Int32 firstDocID) in
d:\Lucene.Net\FullRepo\trunk\src\core\Search\BooleanScorer.cs:line 323
at Lucene.Net.Search.BooleanScorer.Score(Collector collector) in
d:\Lucene.Net\FullRepo\trunk\src\core\Search\BooleanScorer.cs:line 389
at Lucene.Net.Search.IndexSearcher.Search(Weight weight, Filter
filter, Collector collector) in
d:\Lucene.Net\FullRepo\trunk\src\core\Search\IndexSearcher.cs:line 228
at Lucene.Net.Search.IndexSearcher.Search(Weight weight, Filter
filter, Int32 nDocs) in
d:\Lucene.Net\FullRepo\trunk\src\core\Search\IndexSearcher.cs:line 188
at Lucene.Net.Search.Searcher.Search(Query query, Filter filter, Int32
n) in d:\Lucene.Net\FullRepo\trunk\src\core\Search\Searcher.cs:line
108 at Lucene.Net.Search.Searcher.Search(Query query, Int32 n) in
d:\Lucene.Net\FullRepo\trunk\src\core\Search\Searcher.cs:line 118
at...
如果我改为使用此查询:
(Description:tasty) (Gtin:000)
我得到了结果。是什么导致了顶级查询中的异常? FWIW,这里是相关的代码片段:
protected virtual IList<Document> GetDocuments(BooleanQuery query, DirectoryInfo indexLocation, string defaultField)
{
var docs = new List<Document>();
using (var dir = new MMapDirectory(indexLocation))
{
using (var searcher = new IndexSearcher(dir))
{
var queryParser = new QueryParser(Constants.LuceneVersion, defaultField, new StandardAnalyzer(Constants.LuceneVersion));
TopDocs result = searcher.Search(query, Constants.MaxHits);
if (result == null) return docs;
foreach (var scoredoc in result.ScoreDocs.OrderByDescending(d => d.Score))
{
docs.Add(searcher.Doc(scoredoc.Doc));
}
return docs;
}
}
}
根据下面的评论,这是我当前未编辑的代码,但仍然不起作用。
protected virtual IList<Document> GetDocuments(BooleanQuery query, DirectoryInfo indexLocation, string defaultField)
{
var docs = new List<Document>();
using (var dir = new MMapDirectory(indexLocation))
{
using (var searcher = new IndexSearcher(dir))
{
using (var analyzer = new StandardAnalyzer(Constants.LuceneVersion))
{
var queryParser = new QueryParser(Constants.LuceneVersion, defaultField, analyzer);
var collector = TopScoreDocCollector.Create(Constants.MaxHits, true);
var parsed = queryParser.Parse(query.ToString());
searcher.Search(parsed, collector);
var docsresult = new List<string>();
var matches = collector.TopDocs().ScoreDocs;
foreach (var scoredoc in matches.OrderByDescending(d => d.Score))
{
docs.Add(searcher.Doc(scoredoc.Doc));
}
return docs;
}
}
}
}
不是严格意义上的答案 "works on my machine"。作为答案发布,以便我可以共享 "works" 的单元测试代码。希望 OP 可以展示他们版本的不同之处。
此版本假定 "Gtin" 字段是一个字符串字段并且 未 分析(因为它似乎是一个代码)。
[TestClass]
public class UnitTest4
{
[TestMethod]
public void TestLucene()
{
var writer = CreateIndex();
Add(writer, "tasty", "00018389732061");
writer.Flush(true, true, true);
var searcher = new IndexSearcher(writer.GetReader());
Test(searcher, "(Description:tasty) (Gtin:00018389732061)");
Test(searcher, "Description:tasty Gtin:00018389732061");
Test(searcher, "+Description:tasty +Gtin:00018389732061");
Test(searcher, "+Description:tasty +Gtin:000*");
writer.Dispose();
}
private void Test(IndexSearcher searcher, string query)
{
var result = Search(searcher, query);
Console.WriteLine(string.Join(", ", result));
Assert.AreEqual(1, result.Count);
Assert.AreEqual("00018389732061", result[0]);
}
private List<string> Search(IndexSearcher searcher, string expr)
{
using (var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30))
{
var queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "Description", analyzer);
var collector = TopScoreDocCollector.Create(1000, true);
var query = queryParser.Parse(expr);
searcher.Search(query, collector);
var result = new List<string>();
var matches = collector.TopDocs().ScoreDocs;
foreach (var item in matches)
{
var id = item.Doc;
var doc = searcher.Doc(id);
result.Add(doc.GetField("Gtin").StringValue);
}
return result;
}
}
IndexWriter CreateIndex()
{
var directory = new RAMDirectory();
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
var writer = new IndexWriter(directory, analyzer, new IndexWriter.MaxFieldLength(1000));
return writer;
}
void Add(IndexWriter writer, string desc, string id)
{
var document = new Document();
document.Add(new Field("Description", desc, Field.Store.YES, Field.Index.ANALYZED));
document.Add(new Field("Gtin", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.AddDocument(document);
}
}
我正在使用 Lucene.NET 在我们的应用程序中整合全文搜索的概念证明。有些查询工作正常,有些似乎 return 结果与 Luke 工具 returning 不匹配。更有问题的是,这个查询:
(Description:tasty) (Gtin:00018389732061)
总是产生这个异常:
An unhandled exception of type 'System.IndexOutOfRangeException' occurred in Lucene.Net.dll at Lucene.Net.Search.TermScorer.Score() in d:\Lucene.Net\FullRepo\trunk\src\core\Search\TermScorer.cs:line 136 at Lucene.Net.Search.BooleanScorer.BooleanScorerCollector.Collect(Int32 doc) in d:\Lucene.Net\FullRepo\trunk\src\core\Search\BooleanScorer.cs:line 88 at Lucene.Net.Search.TermScorer.Score(Collector c, Int32 end, Int32 firstDocID) in d:\Lucene.Net\FullRepo\trunk\src\core\Search\TermScorer.cs:line 80
at Lucene.Net.Search.BooleanScorer.Score(Collector collector, Int32 max, Int32 firstDocID) in d:\Lucene.Net\FullRepo\trunk\src\core\Search\BooleanScorer.cs:line 323 at Lucene.Net.Search.BooleanScorer.Score(Collector collector) in d:\Lucene.Net\FullRepo\trunk\src\core\Search\BooleanScorer.cs:line 389 at Lucene.Net.Search.IndexSearcher.Search(Weight weight, Filter filter, Collector collector) in d:\Lucene.Net\FullRepo\trunk\src\core\Search\IndexSearcher.cs:line 228 at Lucene.Net.Search.IndexSearcher.Search(Weight weight, Filter filter, Int32 nDocs) in d:\Lucene.Net\FullRepo\trunk\src\core\Search\IndexSearcher.cs:line 188 at Lucene.Net.Search.Searcher.Search(Query query, Filter filter, Int32 n) in d:\Lucene.Net\FullRepo\trunk\src\core\Search\Searcher.cs:line 108 at Lucene.Net.Search.Searcher.Search(Query query, Int32 n) in d:\Lucene.Net\FullRepo\trunk\src\core\Search\Searcher.cs:line 118
at...
如果我改为使用此查询:
(Description:tasty) (Gtin:000)
我得到了结果。是什么导致了顶级查询中的异常? FWIW,这里是相关的代码片段:
protected virtual IList<Document> GetDocuments(BooleanQuery query, DirectoryInfo indexLocation, string defaultField)
{
var docs = new List<Document>();
using (var dir = new MMapDirectory(indexLocation))
{
using (var searcher = new IndexSearcher(dir))
{
var queryParser = new QueryParser(Constants.LuceneVersion, defaultField, new StandardAnalyzer(Constants.LuceneVersion));
TopDocs result = searcher.Search(query, Constants.MaxHits);
if (result == null) return docs;
foreach (var scoredoc in result.ScoreDocs.OrderByDescending(d => d.Score))
{
docs.Add(searcher.Doc(scoredoc.Doc));
}
return docs;
}
}
}
根据下面的评论,这是我当前未编辑的代码,但仍然不起作用。
protected virtual IList<Document> GetDocuments(BooleanQuery query, DirectoryInfo indexLocation, string defaultField)
{
var docs = new List<Document>();
using (var dir = new MMapDirectory(indexLocation))
{
using (var searcher = new IndexSearcher(dir))
{
using (var analyzer = new StandardAnalyzer(Constants.LuceneVersion))
{
var queryParser = new QueryParser(Constants.LuceneVersion, defaultField, analyzer);
var collector = TopScoreDocCollector.Create(Constants.MaxHits, true);
var parsed = queryParser.Parse(query.ToString());
searcher.Search(parsed, collector);
var docsresult = new List<string>();
var matches = collector.TopDocs().ScoreDocs;
foreach (var scoredoc in matches.OrderByDescending(d => d.Score))
{
docs.Add(searcher.Doc(scoredoc.Doc));
}
return docs;
}
}
}
}
不是严格意义上的答案 "works on my machine"。作为答案发布,以便我可以共享 "works" 的单元测试代码。希望 OP 可以展示他们版本的不同之处。
此版本假定 "Gtin" 字段是一个字符串字段并且 未 分析(因为它似乎是一个代码)。
[TestClass]
public class UnitTest4
{
[TestMethod]
public void TestLucene()
{
var writer = CreateIndex();
Add(writer, "tasty", "00018389732061");
writer.Flush(true, true, true);
var searcher = new IndexSearcher(writer.GetReader());
Test(searcher, "(Description:tasty) (Gtin:00018389732061)");
Test(searcher, "Description:tasty Gtin:00018389732061");
Test(searcher, "+Description:tasty +Gtin:00018389732061");
Test(searcher, "+Description:tasty +Gtin:000*");
writer.Dispose();
}
private void Test(IndexSearcher searcher, string query)
{
var result = Search(searcher, query);
Console.WriteLine(string.Join(", ", result));
Assert.AreEqual(1, result.Count);
Assert.AreEqual("00018389732061", result[0]);
}
private List<string> Search(IndexSearcher searcher, string expr)
{
using (var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30))
{
var queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "Description", analyzer);
var collector = TopScoreDocCollector.Create(1000, true);
var query = queryParser.Parse(expr);
searcher.Search(query, collector);
var result = new List<string>();
var matches = collector.TopDocs().ScoreDocs;
foreach (var item in matches)
{
var id = item.Doc;
var doc = searcher.Doc(id);
result.Add(doc.GetField("Gtin").StringValue);
}
return result;
}
}
IndexWriter CreateIndex()
{
var directory = new RAMDirectory();
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
var writer = new IndexWriter(directory, analyzer, new IndexWriter.MaxFieldLength(1000));
return writer;
}
void Add(IndexWriter writer, string desc, string id)
{
var document = new Document();
document.Add(new Field("Description", desc, Field.Store.YES, Field.Index.ANALYZED));
document.Add(new Field("Gtin", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.AddDocument(document);
}
}