字段提升不会 Work/Effect Lucene.net
Field Boosting Doesn't Work/Effect Lucene.net
我正在尝试在文档字段上设置提升以使搜索结果更准确,但据我所知它不起作用
然而
这是我的代码
Indexing:
private static void _addToLuceneIndex(Datafile Datafile, IndexWriter writer)
{
// remove older index entry
var searchQuery = new TermQuery(new Term("Id", Datafile.article.Id.ToString()));
writer.DeleteDocuments(searchQuery);
// add new index entry
var doc = new Document();
var id = new Field("Id", Datafile.article.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
var content = new Field("Content", Datafile.article.Content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
content.Boost = 4;
var title = new Field("Title", Datafile.article.Title, Field.Store.YES, Field.Index.ANALYZED);
title.Boost = 6;
doc.Add(id);
doc.Add(content);
doc.Add(title);
foreach (var item in Datafile.article.Article_Tag)
{
var tmpta = new Field("Atid", item.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
var tagname = new Field("Tagname", item.Tag.name, Field.Store.YES, Field.Index.ANALYZED);
tagname.Boost = 8;
doc.Add(tmpta);
doc.Add(tagname);
}
// add lucene fields mapped to db fields
// add entry to index
writer.AddDocument(doc);
}
我已经使用 Lukenet 来查看字段是否增强了,但是它没有增强仍然等于 1.0
所以我尝试运行并测试它,但结果还是让我失望
这是我的搜索代码:
searching:
private static IEnumerable<Datafile> _search(string searchQuery, string searchField = "")
{
// validation
if (string.IsNullOrEmpty(searchQuery.Replace("*", "").Replace("?", "")))
return new List<Datafile>();
var indexReader = IndexReader.Open(Directory, false);
// set up lucene searcher
using (var searcher = new IndexSearcher(indexReader))
{
var hits_limit = 1000;
// search by single field
var enanalyzer = new SnowballAnalyzer(Version.LUCENE_30, "English");
var aranalyzer = new SnowballAnalyzer(Version.LUCENE_30, "Arabic");
string[] fields = new string[] { "Title", "Content", "Tagname" };
// Dictionary<string, float> boosts = new Dictionary<string, float>();
// boosts.Add("Title", 5);
// boosts.Add("Content", 3);
// boosts.Add("Tagname", 7);
var enparser = new MultiFieldQueryParser(Version.LUCENE_30, fields, enanalyzer);
var arparser = new MultiFieldQueryParser(Version.LUCENE_30, fields, aranalyzer);
var query = QueryModel(searchQuery, new QueryParser[] { enparser, arparser });
searcher.SetDefaultFieldSortScoring(true, false);
TopFieldCollector collector = TopFieldCollector.Create(new Sort(new SortField(null, SortField.SCORE, false), new SortField("Title", SortField.STRING, true), new SortField("Tagname", SortField.STRING, true), new SortField("Content", SortField.STRING, true)),
hits_limit,
false, // fillFields - not needed, we want score and doc only
true, // trackDocScores - need doc and score fields
true, // trackMaxScore - related to trackDocScores
false); // should docs be in docId order?
searcher.Search(query, collector);
var hits = collector.TopDocs().ScoreDocs;
var results = new List<Datafile>();
foreach (var hit in hits)
{
var doc = searcher.Doc(hit.Doc);
var df = _mapLuceneDocumentToData(doc);
df.score = hit.Score;
results.Add(df);
}
searcher.Dispose();
return results;
// search by multiple fields (ordered by RELEVANCE)
}
}
QueryModel Method:
private static Query QueryModel(string searchQuery, QueryParser[] parsers)
{
BooleanQuery query = new BooleanQuery();
searchQuery = "*" + searchQuery + "*";
foreach (var parser in parsers)
{
parser.AllowLeadingWildcard = true;
var thequery = parser.Parse(searchQuery);
query.Add(new BooleanClause(thequery, Occur.SHOULD));
}
return query;
}
我是 lucene.net 的新手,我喜欢它,但我无法解决这个问题
PS:
我还想得到一个模糊查询,就像用户输入时一样:
俄罗斯城市 得到的结果就像他输入:俄罗斯城市
我试过 FuzzyQuery
Class 但它不管用,是否有必要使用 FuzzyQuery
Class 或不使用
所以由于没有人回答我的问题,我已经找到了解决这个问题的方法,所以我使用了搜索时间查询提升,这是我的代码:
var QParser = new QueryParser(Version.LUCENE_30, "Content", analyzer);
QParser.AllowLeadingWildcard = true;
var Query = new QParser.Parse(searchQuery);
Query.Boost = 7.0f;
return Query;
如果您想进行 Or
、And
搜索,可以使用 BooleanQuery
我正在尝试在文档字段上设置提升以使搜索结果更准确,但据我所知它不起作用
然而 这是我的代码
Indexing:
private static void _addToLuceneIndex(Datafile Datafile, IndexWriter writer)
{
// remove older index entry
var searchQuery = new TermQuery(new Term("Id", Datafile.article.Id.ToString()));
writer.DeleteDocuments(searchQuery);
// add new index entry
var doc = new Document();
var id = new Field("Id", Datafile.article.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
var content = new Field("Content", Datafile.article.Content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
content.Boost = 4;
var title = new Field("Title", Datafile.article.Title, Field.Store.YES, Field.Index.ANALYZED);
title.Boost = 6;
doc.Add(id);
doc.Add(content);
doc.Add(title);
foreach (var item in Datafile.article.Article_Tag)
{
var tmpta = new Field("Atid", item.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
var tagname = new Field("Tagname", item.Tag.name, Field.Store.YES, Field.Index.ANALYZED);
tagname.Boost = 8;
doc.Add(tmpta);
doc.Add(tagname);
}
// add lucene fields mapped to db fields
// add entry to index
writer.AddDocument(doc);
}
我已经使用 Lukenet 来查看字段是否增强了,但是它没有增强仍然等于 1.0
所以我尝试运行并测试它,但结果还是让我失望
这是我的搜索代码:
searching:
private static IEnumerable<Datafile> _search(string searchQuery, string searchField = "")
{
// validation
if (string.IsNullOrEmpty(searchQuery.Replace("*", "").Replace("?", "")))
return new List<Datafile>();
var indexReader = IndexReader.Open(Directory, false);
// set up lucene searcher
using (var searcher = new IndexSearcher(indexReader))
{
var hits_limit = 1000;
// search by single field
var enanalyzer = new SnowballAnalyzer(Version.LUCENE_30, "English");
var aranalyzer = new SnowballAnalyzer(Version.LUCENE_30, "Arabic");
string[] fields = new string[] { "Title", "Content", "Tagname" };
// Dictionary<string, float> boosts = new Dictionary<string, float>();
// boosts.Add("Title", 5);
// boosts.Add("Content", 3);
// boosts.Add("Tagname", 7);
var enparser = new MultiFieldQueryParser(Version.LUCENE_30, fields, enanalyzer);
var arparser = new MultiFieldQueryParser(Version.LUCENE_30, fields, aranalyzer);
var query = QueryModel(searchQuery, new QueryParser[] { enparser, arparser });
searcher.SetDefaultFieldSortScoring(true, false);
TopFieldCollector collector = TopFieldCollector.Create(new Sort(new SortField(null, SortField.SCORE, false), new SortField("Title", SortField.STRING, true), new SortField("Tagname", SortField.STRING, true), new SortField("Content", SortField.STRING, true)),
hits_limit,
false, // fillFields - not needed, we want score and doc only
true, // trackDocScores - need doc and score fields
true, // trackMaxScore - related to trackDocScores
false); // should docs be in docId order?
searcher.Search(query, collector);
var hits = collector.TopDocs().ScoreDocs;
var results = new List<Datafile>();
foreach (var hit in hits)
{
var doc = searcher.Doc(hit.Doc);
var df = _mapLuceneDocumentToData(doc);
df.score = hit.Score;
results.Add(df);
}
searcher.Dispose();
return results;
// search by multiple fields (ordered by RELEVANCE)
}
}
QueryModel Method:
private static Query QueryModel(string searchQuery, QueryParser[] parsers)
{
BooleanQuery query = new BooleanQuery();
searchQuery = "*" + searchQuery + "*";
foreach (var parser in parsers)
{
parser.AllowLeadingWildcard = true;
var thequery = parser.Parse(searchQuery);
query.Add(new BooleanClause(thequery, Occur.SHOULD));
}
return query;
}
我是 lucene.net 的新手,我喜欢它,但我无法解决这个问题
PS:
我还想得到一个模糊查询,就像用户输入时一样:
俄罗斯城市 得到的结果就像他输入:俄罗斯城市
我试过 FuzzyQuery
Class 但它不管用,是否有必要使用 FuzzyQuery
Class 或不使用
所以由于没有人回答我的问题,我已经找到了解决这个问题的方法,所以我使用了搜索时间查询提升,这是我的代码:
var QParser = new QueryParser(Version.LUCENE_30, "Content", analyzer);
QParser.AllowLeadingWildcard = true;
var Query = new QParser.Parse(searchQuery);
Query.Boost = 7.0f;
return Query;
如果您想进行 Or
、And
搜索,可以使用 BooleanQuery