在弹性搜索中的模糊搜索分析器中忽略特定字符
Ignore specific character during fuzzy searches analyzer in Elastic search
我在弹性搜索中有一个模糊搜索分析器,其中包含以下文档
PUT test_index
{
"settings": {
"index": {
"max_ngram_diff": 40
},
"analysis": {
"analyzer": {
"autocomplete": {
"tokenizer": "whitespace",
"filter": [
"lowercase",
"autocomplete"
]
},
"autocomplete_search": {
"tokenizer": "whitespace",
"filter": [
"lowercase"
]
}
},
"filter": {
"autocomplete": {
"type": "ngram",
"min_gram": 2,
"max_gram": 40
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
}
}
}
}
PUT test_index/_doc/1
{ "title": "HRT 2018-BN18 N-SB" }
PUT test_index/_doc/2
{ "title": "GMC 2019-BN18 A-SB" }
我如何在模糊搜索期间忽略连字符 ('-'),以便 GMC 2019-BN18 A-SB、gmc 2019、gmc 2019-BN18 A-SB 和 GMC 2019-BN18 ASB 产生相同的文档
我曾尝试单独创建另一个分析器,但我不确定如何在同一字段上应用多个分析器
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "standard",
"char_filter": [
"my_char_filter"
]
}
},
"char_filter": {
"my_char_filter": {
"type": "mapping",
"mappings": [
"- => "
]
}
}
}
}
您走在正确的道路上,您只需将该字符过滤器添加到两个分析器,以确保在索引和搜索时删除连字符:
PUT test_index
{
"settings": {
"index": {
"max_ngram_diff": 40
},
"analysis": {
"char_filter": {
"my_char_filter": {
"type": "mapping",
"mappings": [
"- => "
]
}
},
"analyzer": {
"autocomplete": {
"char_filter": [
"my_char_filter"
],
"tokenizer": "whitespace",
"filter": [
"lowercase",
"autocomplete"
]
},
"autocomplete_search": {
"char_filter": [
"my_char_filter"
],
"tokenizer": "whitespace",
"filter": [
"lowercase"
]
}
},
"filter": {
"autocomplete": {
"type": "ngram",
"min_gram": 2,
"max_gram": 40
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
}
}
}
}
我在弹性搜索中有一个模糊搜索分析器,其中包含以下文档
PUT test_index
{
"settings": {
"index": {
"max_ngram_diff": 40
},
"analysis": {
"analyzer": {
"autocomplete": {
"tokenizer": "whitespace",
"filter": [
"lowercase",
"autocomplete"
]
},
"autocomplete_search": {
"tokenizer": "whitespace",
"filter": [
"lowercase"
]
}
},
"filter": {
"autocomplete": {
"type": "ngram",
"min_gram": 2,
"max_gram": 40
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
}
}
}
}
PUT test_index/_doc/1
{ "title": "HRT 2018-BN18 N-SB" }
PUT test_index/_doc/2
{ "title": "GMC 2019-BN18 A-SB" }
我如何在模糊搜索期间忽略连字符 ('-'),以便 GMC 2019-BN18 A-SB、gmc 2019、gmc 2019-BN18 A-SB 和 GMC 2019-BN18 ASB 产生相同的文档
我曾尝试单独创建另一个分析器,但我不确定如何在同一字段上应用多个分析器
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "standard",
"char_filter": [
"my_char_filter"
]
}
},
"char_filter": {
"my_char_filter": {
"type": "mapping",
"mappings": [
"- => "
]
}
}
}
}
您走在正确的道路上,您只需将该字符过滤器添加到两个分析器,以确保在索引和搜索时删除连字符:
PUT test_index
{
"settings": {
"index": {
"max_ngram_diff": 40
},
"analysis": {
"char_filter": {
"my_char_filter": {
"type": "mapping",
"mappings": [
"- => "
]
}
},
"analyzer": {
"autocomplete": {
"char_filter": [
"my_char_filter"
],
"tokenizer": "whitespace",
"filter": [
"lowercase",
"autocomplete"
]
},
"autocomplete_search": {
"char_filter": [
"my_char_filter"
],
"tokenizer": "whitespace",
"filter": [
"lowercase"
]
}
},
"filter": {
"autocomplete": {
"type": "ngram",
"min_gram": 2,
"max_gram": 40
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
}
}
}
}