ElasticSearch 如何管理 ngram 查询中的分数结果?
ElasticSearch how to manage the score result in ngram query?
我的索引中有数百个化学结果 climate_change
我正在使用 ngram 研究,这是我用于索引的设置。
{
"settings": {
"index.max_ngram_diff": 30,
"index": {
"analysis": {
"analyzer": {
"analyzer": {
"tokenizer": "test_ngram",
"filter": [
"lowercase"
]
},
"search_analyzer": {
"tokenizer": "test_ngram",
"filter": [
"lowercase"
]
}
},
"tokenizer": {
"test_ngram": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 30,
"token_chars": [
"letter",
"digit"
]
}
}
}
}
}
}
我的主要问题是,如果我尝试执行这样的查询
GET climate_change/_search?size=1000
{
"query": {
"match": {
"description": {
"query":"oxygen"
}
}
}
}
我看到很多结果相同的分数 7.381186..但很奇怪
{
"_index" : "climate_change",
"_type" : "_doc",
"_id" : "XXX",
"_score" : 7.381186,
"_source" : {
"recordtype" : "chemicals",
"description" : "carbon/oxygen"
}
},
{
"_index" : "climate_change",
"_type" : "_doc",
"_id" : "YYY",
"_score" : 7.381186,
"_source" : {
"recordtype" : "chemicals",
"description" : "oxygen"
}
怎么可能?
在上面的示例中,如果我使用 ngram 并且我在 description 中搜索 oxygen字段,我希望第二个结果的分数比第一个结果 大 。
我还尝试在设置中指定分词器“standard”和“whitespace”的类型,但无济于事。
也许是描述中的“/”字符?
非常感谢!
您还需要在 description
字段的映射中定义分析器。
添加包含索引数据、映射、搜索查询和搜索结果的工作示例
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "test_ngram",
"filter": [
"lowercase"
]
},
"search_analyzer": {
"tokenizer": "test_ngram",
"filter": [
"lowercase"
]
}
},
"tokenizer": {
"test_ngram": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 30,
"token_chars": [
"letter",
"digit"
]
}
}
}
},
"mappings": {
"properties": {
"description": {
"type": "text",
"analyzer": "my_analyzer"
}
}
}
}
索引数据:
{
"recordtype": "chemicals",
"description": "carbon/oxygen"
}
{
"recordtype": "chemicals",
"description": "oxygen"
}
搜索查询:
{
"query": {
"match": {
"description": {
"query":"oxygen"
}
}
}
}
搜索结果:
"hits": [
{
"_index": "67180160",
"_type": "_doc",
"_id": "2",
"_score": 0.89246297,
"_source": {
"recordtype": "chemicals",
"description": "oxygen"
}
},
{
"_index": "67180160",
"_type": "_doc",
"_id": "1",
"_score": 0.6651374,
"_source": {
"recordtype": "chemicals",
"description": "carbon/oxygen"
}
}
]
我的索引中有数百个化学结果 climate_change
我正在使用 ngram 研究,这是我用于索引的设置。
{
"settings": {
"index.max_ngram_diff": 30,
"index": {
"analysis": {
"analyzer": {
"analyzer": {
"tokenizer": "test_ngram",
"filter": [
"lowercase"
]
},
"search_analyzer": {
"tokenizer": "test_ngram",
"filter": [
"lowercase"
]
}
},
"tokenizer": {
"test_ngram": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 30,
"token_chars": [
"letter",
"digit"
]
}
}
}
}
}
}
我的主要问题是,如果我尝试执行这样的查询
GET climate_change/_search?size=1000
{
"query": {
"match": {
"description": {
"query":"oxygen"
}
}
}
}
我看到很多结果相同的分数 7.381186..但很奇怪
{
"_index" : "climate_change",
"_type" : "_doc",
"_id" : "XXX",
"_score" : 7.381186,
"_source" : {
"recordtype" : "chemicals",
"description" : "carbon/oxygen"
}
},
{
"_index" : "climate_change",
"_type" : "_doc",
"_id" : "YYY",
"_score" : 7.381186,
"_source" : {
"recordtype" : "chemicals",
"description" : "oxygen"
}
怎么可能? 在上面的示例中,如果我使用 ngram 并且我在 description 中搜索 oxygen字段,我希望第二个结果的分数比第一个结果 大 。 我还尝试在设置中指定分词器“standard”和“whitespace”的类型,但无济于事。 也许是描述中的“/”字符?
非常感谢!
您还需要在 description
字段的映射中定义分析器。
添加包含索引数据、映射、搜索查询和搜索结果的工作示例
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "test_ngram",
"filter": [
"lowercase"
]
},
"search_analyzer": {
"tokenizer": "test_ngram",
"filter": [
"lowercase"
]
}
},
"tokenizer": {
"test_ngram": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 30,
"token_chars": [
"letter",
"digit"
]
}
}
}
},
"mappings": {
"properties": {
"description": {
"type": "text",
"analyzer": "my_analyzer"
}
}
}
}
索引数据:
{
"recordtype": "chemicals",
"description": "carbon/oxygen"
}
{
"recordtype": "chemicals",
"description": "oxygen"
}
搜索查询:
{
"query": {
"match": {
"description": {
"query":"oxygen"
}
}
}
}
搜索结果:
"hits": [
{
"_index": "67180160",
"_type": "_doc",
"_id": "2",
"_score": 0.89246297,
"_source": {
"recordtype": "chemicals",
"description": "oxygen"
}
},
{
"_index": "67180160",
"_type": "_doc",
"_id": "1",
"_score": 0.6651374,
"_source": {
"recordtype": "chemicals",
"description": "carbon/oxygen"
}
}
]