Lucene - 将日期排序为 NumericField
Lucene - Sorting Date as NumericField
在尝试对日期时间(长)数字字段进行排序时,我总是遇到 FormatException。
When converting a string to DateTime, parse the string to take the
date before putting each variable into the DateTime object.
添加数字字段:
doc.Add(new NumericField("creationDate", Field.Store.YES, true)
.SetLongValue(DateTime.UtcNow.Ticks);
添加排序:
// boolean query
var sortField = new SortField("creationDate", SortField.LONG, true);
var inverseSort = new Sort(sortField);
var results = searcher.Search(query, null, 100, inverseSort); // exception thrown here
检查索引,我可以验证 'creationDate' 字段正在存储 "long" 值。什么可能导致此异常?
编辑:
查询
var query = new BooleanQuery();
foreach (var termQuery in incomingProps.Select(p => new TermQuery(new Term(kvp.Key, kvp.Value.ToLowerInvariant()))
{
query.Add(new BooleanClause(termQuery , Occur.Must));
}
return query;
版本:Lucene.Net3.0.3
更新:
此问题再次出现,现在是 INT 值。
我下载了 Lucene.Net 源代码并调试了问题。
所以它在 FieldCache 的某个地方,当试图将值“`\b[=67=][=67=][=67=]”解析为整数时,这看起来有点奇怪。
我将这些值添加为数字字段:
doc.Add(new NumericField(VersionNum, int.MaxValue, Field.Store.YES,
true).SetIntValue(VersionValue));
当我应该至少得到 1 次回击时,我得到了异常。
检查索引后,我看到该字段的术语如下:
字段文本为:
编辑:
我硬编码了一个 int 值并添加了几个段:
doc.Add(new Field(VersionNum, NumericUtils.IntToPrefixCoded(1), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
这导致将版本字段存储为:
而且,当我尝试排序时,出现解析错误:
var sortVersion = new SortField(VersionNum, SortField.INT, true);
对于每个异常,Lucene 都试图解析“\b[=67=][=67=][=67=]”。
查看存储为字符串的前缀编码,我猜 1 会转换为“ \b[=67=][=67=][=67=]\1 ”?
Lucene 可能会在 FieldCache 中留下一些垃圾吗?
这是一个单元测试,它试图捕捉您的要求。测试通过。你能解释一下你的代码有什么不同吗? (发布完整的失败测试将帮助我们了解您在做什么:-))
using System;
using System.Linq;
using System.Collections.Generic;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Lucene.Net.Search;
using Lucene.Net.Index;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.QueryParsers;
using Lucene.Net.Documents;
using Lucene.Net.Store;
namespace SO_answers
{
[TestClass]
public class UnitTest1
{
[TestMethod]
public void TestShopping()
{
var item = new Dictionary<string, string>
{
{"field1", "value1" },
{"field2", "value2" },
{"field3", "value3" }
};
var writer = CreateIndex();
Add(writer, item);
writer.Flush(true, true, true);
var searcher = new IndexSearcher(writer.GetReader());
var result = Search(searcher, item);
Assert.AreEqual(1, result.Count);
writer.Dispose();
}
private List<string> Search(IndexSearcher searcher, Dictionary<string, string> values)
{
var query = new BooleanQuery();
foreach (var termQuery in values.Select(kvp => new TermQuery(new Term(kvp.Key, kvp.Value.ToLowerInvariant()))))
query.Add(new BooleanClause(termQuery, Occur.MUST));
return Search(searcher, query);
}
private List<string> Search(IndexSearcher searcher, Query query)
{
var sortField = new SortField("creationDate", SortField.LONG, true);
var inverseSort = new Sort(sortField);
var results = searcher.Search(query, null, 100, inverseSort); // exception thrown here
var result = new List<string>();
var matches = results.ScoreDocs;
foreach (var item in matches)
{
var id = item.Doc;
var doc = searcher.Doc(id);
result.Add(doc.GetField("creationDate").StringValue);
}
return result;
}
IndexWriter CreateIndex()
{
var directory = new RAMDirectory();
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
var writer = new IndexWriter(directory, analyzer, new IndexWriter.MaxFieldLength(1000));
return writer;
}
void Add(IndexWriter writer, IDictionary<string, string> values)
{
var document = new Document();
foreach (var kvp in values)
document.Add(new Field(kvp.Key, kvp.Value.ToLowerInvariant(), Field.Store.YES, Field.Index.ANALYZED));
document.Add(new NumericField("creationDate", Field.Store.YES, true).SetLongValue(DateTime.UtcNow.Ticks));
writer.AddDocument(document);
}
}
}
在尝试对日期时间(长)数字字段进行排序时,我总是遇到 FormatException。
When converting a string to DateTime, parse the string to take the date before putting each variable into the DateTime object.
添加数字字段:
doc.Add(new NumericField("creationDate", Field.Store.YES, true)
.SetLongValue(DateTime.UtcNow.Ticks);
添加排序:
// boolean query
var sortField = new SortField("creationDate", SortField.LONG, true);
var inverseSort = new Sort(sortField);
var results = searcher.Search(query, null, 100, inverseSort); // exception thrown here
检查索引,我可以验证 'creationDate' 字段正在存储 "long" 值。什么可能导致此异常?
编辑:
查询
var query = new BooleanQuery();
foreach (var termQuery in incomingProps.Select(p => new TermQuery(new Term(kvp.Key, kvp.Value.ToLowerInvariant()))
{
query.Add(new BooleanClause(termQuery , Occur.Must));
}
return query;
版本:Lucene.Net3.0.3
更新:
此问题再次出现,现在是 INT 值。 我下载了 Lucene.Net 源代码并调试了问题。
所以它在 FieldCache 的某个地方,当试图将值“`\b[=67=][=67=][=67=]”解析为整数时,这看起来有点奇怪。
我将这些值添加为数字字段:
doc.Add(new NumericField(VersionNum, int.MaxValue, Field.Store.YES,
true).SetIntValue(VersionValue));
当我应该至少得到 1 次回击时,我得到了异常。 检查索引后,我看到该字段的术语如下:
字段文本为:
编辑:
我硬编码了一个 int 值并添加了几个段:
doc.Add(new Field(VersionNum, NumericUtils.IntToPrefixCoded(1), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
这导致将版本字段存储为:
而且,当我尝试排序时,出现解析错误:
var sortVersion = new SortField(VersionNum, SortField.INT, true);
对于每个异常,Lucene 都试图解析“\b[=67=][=67=][=67=]”。 查看存储为字符串的前缀编码,我猜 1 会转换为“ \b[=67=][=67=][=67=]\1 ”?
Lucene 可能会在 FieldCache 中留下一些垃圾吗?
这是一个单元测试,它试图捕捉您的要求。测试通过。你能解释一下你的代码有什么不同吗? (发布完整的失败测试将帮助我们了解您在做什么:-))
using System;
using System.Linq;
using System.Collections.Generic;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Lucene.Net.Search;
using Lucene.Net.Index;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.QueryParsers;
using Lucene.Net.Documents;
using Lucene.Net.Store;
namespace SO_answers
{
[TestClass]
public class UnitTest1
{
[TestMethod]
public void TestShopping()
{
var item = new Dictionary<string, string>
{
{"field1", "value1" },
{"field2", "value2" },
{"field3", "value3" }
};
var writer = CreateIndex();
Add(writer, item);
writer.Flush(true, true, true);
var searcher = new IndexSearcher(writer.GetReader());
var result = Search(searcher, item);
Assert.AreEqual(1, result.Count);
writer.Dispose();
}
private List<string> Search(IndexSearcher searcher, Dictionary<string, string> values)
{
var query = new BooleanQuery();
foreach (var termQuery in values.Select(kvp => new TermQuery(new Term(kvp.Key, kvp.Value.ToLowerInvariant()))))
query.Add(new BooleanClause(termQuery, Occur.MUST));
return Search(searcher, query);
}
private List<string> Search(IndexSearcher searcher, Query query)
{
var sortField = new SortField("creationDate", SortField.LONG, true);
var inverseSort = new Sort(sortField);
var results = searcher.Search(query, null, 100, inverseSort); // exception thrown here
var result = new List<string>();
var matches = results.ScoreDocs;
foreach (var item in matches)
{
var id = item.Doc;
var doc = searcher.Doc(id);
result.Add(doc.GetField("creationDate").StringValue);
}
return result;
}
IndexWriter CreateIndex()
{
var directory = new RAMDirectory();
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
var writer = new IndexWriter(directory, analyzer, new IndexWriter.MaxFieldLength(1000));
return writer;
}
void Add(IndexWriter writer, IDictionary<string, string> values)
{
var document = new Document();
foreach (var kvp in values)
document.Add(new Field(kvp.Key, kvp.Value.ToLowerInvariant(), Field.Store.YES, Field.Index.ANALYZED));
document.Add(new NumericField("creationDate", Field.Store.YES, true).SetLongValue(DateTime.UtcNow.Ticks));
writer.AddDocument(document);
}
}
}