使用俄语文本分析器搜索不起作用
Search with Russian text analyzer does not work
我有非常简单的 ElasticSearch 模型:
[ElasticsearchType(RelationName = "example")]
public class ElasticModel
{
[Text(Name = "description", Analyzer = "Russian", Index = true, SearchAnalyzer = "Russian")]
public string Description { get; set; }
}
然后我在下一行初始化我的索引:
protected ICreateIndexRequest ConfigureIndex(CreateIndexDescriptor indexDescriptor,
Func<IndexSettingsDescriptor, IPromise<IIndexSettings>> selectorOfIndexSettings)
{
ICreateIndexRequest returnValue;
returnValue = indexDescriptor.Settings(selectorOfIndexSettings);
return returnValue;
}
await _client.Indices.CreateAsync(completeIndexName, indexDescriptor => ConfigureIndex(indexDescriptor, selector));
然后我用下一个值初始化我的模型并尝试搜索:
var document = new ElasticModel()
{
Description = "В Москве все выходные будут дожди"
};
var responseDoc = await await _client.IndexAsync(new IndexRequest<T>(document, completeIndexName))
var responseSearch = await _client.SearchAsync<ElasticModel>(s => s.Index(completeIndexName)
.Query(q => q.QueryString(c => c
.Query("выходной")
)));
但结果为空。
当我向我的 Elasticsearch 服务器发出下一个请求时:
POST {{ElasticSearchAddress}}/_analyze
{
"analyzer": "russian",
"text": "В Москве все выходные будут дожди"
}
我看到预期结果:
{
"tokens": [
{
"token": "москв",
"start_offset": 2,
"end_offset": 8,
"type": "<ALPHANUM>",
"position": 1
},
{
"token": "выходн",
"start_offset": 13,
"end_offset": 21,
"type": "<ALPHANUM>",
"position": 3
},
{
"token": "будут",
"start_offset": 22,
"end_offset": 27,
"type": "<ALPHANUM>",
"position": 4
},
{
"token": "дожд",
"start_offset": 28,
"end_offset": 33,
"type": "<ALPHANUM>",
"position": 5
}
]
}
任何人都可以body解释一下,为什么我从 C# 代码进行的搜索不使用俄语分析器并且没有 return 我预期的结果?
更新:
请求/elastictest100/_search
body:
{
"query": {
"multi_match" : {
"query": "выходные будут",
"fields": [ "description" ],
"analyzer": "russian"
}
}
}
return我:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 0.13353139,
"hits": [
{
"_index": "mediadev-elastictest100",
"_type": "_doc",
"_id": "G2FzRnMBhdWoY2X4fmQo",
"_score": 0.13353139,
"_source": {
"description": "В Москве все выходные будут дожди"
}
},
{
"_index": "mediadev-elastictest100",
"_type": "_doc",
"_id": "HGGLRnMBhdWoY2X4AGSV",
"_score": 0.13353139,
"_source": {
"description": "В Москве все выходные будут дожди"
}
},
{
"_index": "mediadev-elastictest100",
"_type": "_doc",
"_id": "HWGMRnMBhdWoY2X4tGSY",
"_score": 0.13353139,
"_source": {
"description": "В Москве все выходные будут дожди"
}
}
]
}
}
与 body:
{
"query": {
"multi_match" : {
"query": "выходной будет",
"fields": [ "description" ],
"analyzer": "russian"
}
}
}
return我:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 0,
"relation": "eq"
},
"max_score": null,
"hits": []
}
}
我不熟悉 Nest 代码,但可以给你一些调试问题的指导。
- 尝试打印最终搜索查询的 JSON,以便您可以使用 REST 搜索端点轻松测试它,以比较您是否正在生成正确的查询。
- 匹配查询使用与索引时间相同的分析器,但不分析术语查询会导致此类问题,最后要获得搜索结果,它应该匹配索引时间标记进行搜索时间令牌。
最容易检查搜索 JSON 并使用 ES REST 端点直接命中您的索引以查看根本原因。
我有非常简单的 ElasticSearch 模型:
[ElasticsearchType(RelationName = "example")]
public class ElasticModel
{
[Text(Name = "description", Analyzer = "Russian", Index = true, SearchAnalyzer = "Russian")]
public string Description { get; set; }
}
然后我在下一行初始化我的索引:
protected ICreateIndexRequest ConfigureIndex(CreateIndexDescriptor indexDescriptor,
Func<IndexSettingsDescriptor, IPromise<IIndexSettings>> selectorOfIndexSettings)
{
ICreateIndexRequest returnValue;
returnValue = indexDescriptor.Settings(selectorOfIndexSettings);
return returnValue;
}
await _client.Indices.CreateAsync(completeIndexName, indexDescriptor => ConfigureIndex(indexDescriptor, selector));
然后我用下一个值初始化我的模型并尝试搜索:
var document = new ElasticModel()
{
Description = "В Москве все выходные будут дожди"
};
var responseDoc = await await _client.IndexAsync(new IndexRequest<T>(document, completeIndexName))
var responseSearch = await _client.SearchAsync<ElasticModel>(s => s.Index(completeIndexName)
.Query(q => q.QueryString(c => c
.Query("выходной")
)));
但结果为空。 当我向我的 Elasticsearch 服务器发出下一个请求时:
POST {{ElasticSearchAddress}}/_analyze
{
"analyzer": "russian",
"text": "В Москве все выходные будут дожди"
}
我看到预期结果:
{
"tokens": [
{
"token": "москв",
"start_offset": 2,
"end_offset": 8,
"type": "<ALPHANUM>",
"position": 1
},
{
"token": "выходн",
"start_offset": 13,
"end_offset": 21,
"type": "<ALPHANUM>",
"position": 3
},
{
"token": "будут",
"start_offset": 22,
"end_offset": 27,
"type": "<ALPHANUM>",
"position": 4
},
{
"token": "дожд",
"start_offset": 28,
"end_offset": 33,
"type": "<ALPHANUM>",
"position": 5
}
]
}
任何人都可以body解释一下,为什么我从 C# 代码进行的搜索不使用俄语分析器并且没有 return 我预期的结果?
更新:
请求/elastictest100/_search body:
{
"query": {
"multi_match" : {
"query": "выходные будут",
"fields": [ "description" ],
"analyzer": "russian"
}
}
}
return我:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 0.13353139,
"hits": [
{
"_index": "mediadev-elastictest100",
"_type": "_doc",
"_id": "G2FzRnMBhdWoY2X4fmQo",
"_score": 0.13353139,
"_source": {
"description": "В Москве все выходные будут дожди"
}
},
{
"_index": "mediadev-elastictest100",
"_type": "_doc",
"_id": "HGGLRnMBhdWoY2X4AGSV",
"_score": 0.13353139,
"_source": {
"description": "В Москве все выходные будут дожди"
}
},
{
"_index": "mediadev-elastictest100",
"_type": "_doc",
"_id": "HWGMRnMBhdWoY2X4tGSY",
"_score": 0.13353139,
"_source": {
"description": "В Москве все выходные будут дожди"
}
}
]
}
}
与 body:
{
"query": {
"multi_match" : {
"query": "выходной будет",
"fields": [ "description" ],
"analyzer": "russian"
}
}
}
return我:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 0,
"relation": "eq"
},
"max_score": null,
"hits": []
}
}
我不熟悉 Nest 代码,但可以给你一些调试问题的指导。
- 尝试打印最终搜索查询的 JSON,以便您可以使用 REST 搜索端点轻松测试它,以比较您是否正在生成正确的查询。
- 匹配查询使用与索引时间相同的分析器,但不分析术语查询会导致此类问题,最后要获得搜索结果,它应该匹配索引时间标记进行搜索时间令牌。
最容易检查搜索 JSON 并使用 ES REST 端点直接命中您的索引以查看根本原因。