lucene 不区分大小写的排序搜索
lucene case Insensitive sort search
如何在不区分大小写的模式下按多字段排序进行搜索?
我正在使用 lucene 4.10.4 版本并使用多文件排序进行排序
SortField[] sortFiled = new SortField[2];
sortFiled[0] = new SortField("name", SortField.Type.STRING);
sortFiled[1] = new SortField("country", SortField.Type.STRING);
TopDocs topDocs = indexSearcher.search(query, 10 , new Sort(sortFiled));
它给出了排序结果,但在区分大小写的模式下。我希望它以不区分大小写的模式排序。
SortField[] sortFiled = new SortField[2];
sortFiled[0] = new SortField("name", SortField.Type.STRING);
sortFiled[1] = new SortField("country", CaseInsensitiveStringComparator());
在 SortField 中使用 custome filedCompartorSource 作为排序字段类型。
在上面的代码中,我们在不区分大小写的模式下对国家字段进行排序。
请参阅下面的自定义 FieldComparatorSource class
class CaseInsensitiveStringComparator extends FieldComparatorSource{
@Override
public FieldComparator<String> newComparator(String arg0, int arg1, int arg2,
boolean arg3) throws IOException {
return new CaseIgonreCompare(arg0, arg1);
}
}
class CaseIgonreCompare extends FieldComparator<String>{
private String field;
private String bottom;
private String topValue;
private BinaryDocValues cache;
private String[] values;
public CaseIgonreCompare(String field, int numHits) {
this.field = field;
this.values = new String[numHits];
}
@Override
public int compare(int arg0, int arg1) {
return compareValues(values[arg0], values[arg1]);
}
@Override
public int compareBottom(int arg0) throws IOException {
return compareValues(bottom, cache.get(arg0).utf8ToString());
}
@Override
public int compareTop(int arg0) throws IOException {
return compareValues(topValue, cache.get(arg0).utf8ToString());
}
public int compareValues(String first, String second) {
int val = first.length() - second.length();
return val == 0 ? first.compareToIgnoreCase(second) : val;
};
@Override
public void copy(int arg0, int arg1) throws IOException {
values[arg0] = cache.get(arg1).utf8ToString();
}
@Override
public void setBottom(int arg0) {
this.bottom = values[arg0];
}
@Override
public FieldComparator<String> setNextReader(AtomicReaderContext arg0)
throws IOException {
this.cache = FieldCache.DEFAULT.getTerms(arg0.reader(),
field , true);
return this;
}
@Override
public void setTopValue(String arg0) {
this.topValue = arg0;
}
@Override
public String value(int arg0) {
return values[arg0];
}
}
我需要按照冰岛字母规则 (aábcdðeé....) 对字符串字段进行排序,因此我尝试将代码移植到 c# 并使用 StringComparer.InvariantCultureIgnoreCase 比较器。而且效果很好。
所以,这是 Birbal Singh 代码的 c# 端口
CaseInsensitiveStringComparator.cs
public class CaseInsensitiveStringComparator : FieldComparerSource
{
public override FieldComparer NewComparer(string fieldname, int numHits, int sortPos, bool reversed)
{
return new CaseIgonreCompare(fieldname, numHits);
}
}
CaseIgonreCompare.cs
public class CaseIgonreCompare : FieldComparer<string>
{
private string _field;
private string[] _values;
private BinaryDocValues _cache;
private string _bottom;
private string _topValue;
public CaseIgonreCompare(string field, int numHits)
{
_field = field;
_values = new string[numHits];
}
public override IComparable this[int slot] => _values[slot];
public override int CompareValues(string first, string second)
{
int val = first.Length - second.Length;
return StringComparer.InvariantCultureIgnoreCase.Compare(first, second);
}
private string GetValue(int doc)
{
var bytesRef = new BytesRef();
_cache.Get(doc, bytesRef);
return bytesRef.Utf8ToString();
}
public override int Compare(int slot1, int slot2)
{
return string.Compare(_values[slot1], _values[slot2]);
}
public override int CompareBottom(int doc)
{
return CompareValues(_bottom, GetValue(doc));
}
public override int CompareTop(int doc)
{
return CompareValues(_topValue, GetValue(doc));
}
public override void Copy(int slot, int doc)
{
_values[slot] = GetValue(doc);
}
public override void SetBottom(int slot)
{
_bottom = _values[slot];
}
public override FieldComparer SetNextReader(AtomicReaderContext context)
{
_cache = FieldCache.DEFAULT.GetTerms(context.AtomicReader, _field, true);
return this;
}
public override void SetTopValue(object value)
{
_topValue = value as string;
}
}
如何在不区分大小写的模式下按多字段排序进行搜索?
我正在使用 lucene 4.10.4 版本并使用多文件排序进行排序
SortField[] sortFiled = new SortField[2];
sortFiled[0] = new SortField("name", SortField.Type.STRING);
sortFiled[1] = new SortField("country", SortField.Type.STRING);
TopDocs topDocs = indexSearcher.search(query, 10 , new Sort(sortFiled));
它给出了排序结果,但在区分大小写的模式下。我希望它以不区分大小写的模式排序。
SortField[] sortFiled = new SortField[2];
sortFiled[0] = new SortField("name", SortField.Type.STRING);
sortFiled[1] = new SortField("country", CaseInsensitiveStringComparator());
在 SortField 中使用 custome filedCompartorSource 作为排序字段类型。 在上面的代码中,我们在不区分大小写的模式下对国家字段进行排序。 请参阅下面的自定义 FieldComparatorSource class
class CaseInsensitiveStringComparator extends FieldComparatorSource{
@Override
public FieldComparator<String> newComparator(String arg0, int arg1, int arg2,
boolean arg3) throws IOException {
return new CaseIgonreCompare(arg0, arg1);
}
}
class CaseIgonreCompare extends FieldComparator<String>{
private String field;
private String bottom;
private String topValue;
private BinaryDocValues cache;
private String[] values;
public CaseIgonreCompare(String field, int numHits) {
this.field = field;
this.values = new String[numHits];
}
@Override
public int compare(int arg0, int arg1) {
return compareValues(values[arg0], values[arg1]);
}
@Override
public int compareBottom(int arg0) throws IOException {
return compareValues(bottom, cache.get(arg0).utf8ToString());
}
@Override
public int compareTop(int arg0) throws IOException {
return compareValues(topValue, cache.get(arg0).utf8ToString());
}
public int compareValues(String first, String second) {
int val = first.length() - second.length();
return val == 0 ? first.compareToIgnoreCase(second) : val;
};
@Override
public void copy(int arg0, int arg1) throws IOException {
values[arg0] = cache.get(arg1).utf8ToString();
}
@Override
public void setBottom(int arg0) {
this.bottom = values[arg0];
}
@Override
public FieldComparator<String> setNextReader(AtomicReaderContext arg0)
throws IOException {
this.cache = FieldCache.DEFAULT.getTerms(arg0.reader(),
field , true);
return this;
}
@Override
public void setTopValue(String arg0) {
this.topValue = arg0;
}
@Override
public String value(int arg0) {
return values[arg0];
}
}
我需要按照冰岛字母规则 (aábcdðeé....) 对字符串字段进行排序,因此我尝试将代码移植到 c# 并使用 StringComparer.InvariantCultureIgnoreCase 比较器。而且效果很好。
所以,这是 Birbal Singh 代码的 c# 端口
CaseInsensitiveStringComparator.cs
public class CaseInsensitiveStringComparator : FieldComparerSource
{
public override FieldComparer NewComparer(string fieldname, int numHits, int sortPos, bool reversed)
{
return new CaseIgonreCompare(fieldname, numHits);
}
}
CaseIgonreCompare.cs
public class CaseIgonreCompare : FieldComparer<string>
{
private string _field;
private string[] _values;
private BinaryDocValues _cache;
private string _bottom;
private string _topValue;
public CaseIgonreCompare(string field, int numHits)
{
_field = field;
_values = new string[numHits];
}
public override IComparable this[int slot] => _values[slot];
public override int CompareValues(string first, string second)
{
int val = first.Length - second.Length;
return StringComparer.InvariantCultureIgnoreCase.Compare(first, second);
}
private string GetValue(int doc)
{
var bytesRef = new BytesRef();
_cache.Get(doc, bytesRef);
return bytesRef.Utf8ToString();
}
public override int Compare(int slot1, int slot2)
{
return string.Compare(_values[slot1], _values[slot2]);
}
public override int CompareBottom(int doc)
{
return CompareValues(_bottom, GetValue(doc));
}
public override int CompareTop(int doc)
{
return CompareValues(_topValue, GetValue(doc));
}
public override void Copy(int slot, int doc)
{
_values[slot] = GetValue(doc);
}
public override void SetBottom(int slot)
{
_bottom = _values[slot];
}
public override FieldComparer SetNextReader(AtomicReaderContext context)
{
_cache = FieldCache.DEFAULT.GetTerms(context.AtomicReader, _field, true);
return this;
}
public override void SetTopValue(object value)
{
_topValue = value as string;
}
}