elasticsearch 6.4 在字段中不计算 search_analyzer
elasticsearch 6.4 does not count search_analyzer in fields
我在使用 ElasticSearch 映射时遇到问题。
例如字段 name
的映射是:
{
"name": {
"type": "keyword",
"fields": {
"ngram": {
"type": "text",
"analyzer": "ngram_analyzer",
"search_analyzer": "ngram_analyzer"
},
"word": {
"type": "text",
"analyzer": "word_analyzer",
"search_analyzer": "word_analyzer"
}
}
}
}
除了 search_analyzer
之外,整个映射都有效,ElasticSearch 似乎忽略了这一点。
分析设置:
{
"analysis":{
"analyzer":{
"ngram_analyzer":{
"type":"custom",
"char_filter":[
"number_char_filter_map",
"remove_duplicates"
],
"tokenizer":"ngram_tokenizer_whitespace",
"filter":[
"lowercase",
"english_stop"
]
},
"word_analyzer":{
"type":"custom",
"char_filter":[
"number_char_filter_map",
"remove_duplicates"
],
"tokenizer":"word_tokenizer",
"filter":[
"lowercase",
"english_stop"
]
}
},
"char_filter":{
"remove_duplicates":{
"type":"pattern_replace",
"pattern":"(.)(?=\1)",
"replacement":""
},
"remove_white_spaces":{
"type":"pattern_replace",
"pattern":"(\s)",
"replacement":""
}
},
"filter":{
"english_stop":{
"type":"stop",
"ignore_case":true,
"stopwords":"_english_"
}
},
"tokenizer":{
"ngram_tokenizer":{
"type":"ngram",
"min_gram":2,
"max_gram":7
},
"ngram_tokenizer_whitespace":{
"type":"ngram",
"min_gram":2,
"max_gram":7,
"token_chars":[
"letter",
"digit",
"punctuation",
"symbol"
]
},
"word_tokenizer":{
"type":"standard"
}
}
}
}
根据 ElasticSearch 文档,我没有在字段中找到 search_analyzer
的任何定义。如果此方法不起作用,是否有任何替代结构来包含搜索分析器?
几天后问题解决了...
问题是使用与 search_analyzer
相同的分析器。我只是在设置中设置了另一个名称不同的分析器(使用与分析器相同的密钥,search_analyzer 似乎会导致问题并忽略 search_analyzer)。
克隆 ngram_analyzer
到 ngram_search_analyzer
克隆 word_analyzer
到 word_search_analyzer
创建映射请求:
PUT /suggestions HTTP/1.1
Host: localhost:9200
Content-Type: application/json
{
"mappings":{
"doc":{
"properties":{
"caption":{
"type":"keyword",
"fields":{
"ngram":{
"type":"text",
"analyzer":"ngram_analyzer",
"search_analyzer":"ngram_search_analyzer"
},
"word":{
"type":"text",
"analyzer":"word_analyzer",
"search_analyzer":"word_search_analyzer"
}
}
}
}
}
},
"settings":{
"number_of_shards":1,
"number_of_replicas":1,
"routing_partition_size":1,
"analysis":{
"analyzer":{
"ngram_analyzer":{
"type":"custom",
"char_filter":[
"number_char_filter_map",
"remove_duplicates"
],
"tokenizer":"ngram_tokenizer_whitespace",
"filter":[
"lowercase",
"english_stop"
]
},
"ngram_search_analyzer":{
"type":"custom",
"char_filter":[
"number_char_filter_map",
"remove_duplicates"
],
"tokenizer":"ngram_tokenizer_whitespace",
"filter":[
"lowercase",
"english_stop"
]
},
"word_analyzer":{
"type":"custom",
"char_filter":[
"number_char_filter_map",
"remove_duplicates"
],
"tokenizer":"word_tokenizer",
"filter":[
"lowercase",
"english_stop"
]
},
"word_search_analyzer":{
"type":"custom",
"char_filter":[
"number_char_filter_map",
"remove_duplicates"
],
"tokenizer":"word_tokenizer",
"filter":[
"lowercase",
"english_stop"
]
}
},
"char_filter":{
"number_char_filter_map":{
"type":"mapping",
"mappings":[
"\u0660 => 0",
"\u0661 => 1",
"\u0662 => 2",
"\u0663 => 3",
"\u0664 => 4",
"\u0665 => 5",
"\u0666 => 6",
"\u0667 => 7",
"\u0668 => 8",
"\u0669 => 9",
"\u06f0 => 0",
"\u06f1 => 1",
"\u06f2 => 2",
"\u06f3 => 3",
"\u06f4 => 4",
"\u06f5 => 5",
"\u06f6 => 6",
"\u06f7 => 7",
"\u06f8 => 8",
"\u06f9 => 9"
]
},
"remove_duplicates":{
"type":"pattern_replace",
"pattern":"(.)(?=\1)",
"replacement":""
},
"remove_white_spaces":{
"type":"pattern_replace",
"pattern":"(\s)",
"replacement":""
}
},
"filter":{
"english_stop":{
"type":"stop",
"ignore_case":true,
"stopwords":"_english_"
}
},
"tokenizer":{
"ngram_tokenizer":{
"type":"ngram",
"min_gram":2,
"max_gram":7
},
"ngram_tokenizer_whitespace":{
"type":"ngram",
"min_gram":2,
"max_gram":7,
"token_chars":[
"letter",
"digit",
"punctuation",
"symbol"
]
},
"word_tokenizer":{
"type":"standard",
"token_chars":[
]
}
}
}
}
}
现在我在映射中看到了搜索分析器:]
我认为拥有不同的分析器和搜索分析器以供以后自定义也是一件好事。
我在使用 ElasticSearch 映射时遇到问题。
例如字段 name
的映射是:
{
"name": {
"type": "keyword",
"fields": {
"ngram": {
"type": "text",
"analyzer": "ngram_analyzer",
"search_analyzer": "ngram_analyzer"
},
"word": {
"type": "text",
"analyzer": "word_analyzer",
"search_analyzer": "word_analyzer"
}
}
}
}
除了 search_analyzer
之外,整个映射都有效,ElasticSearch 似乎忽略了这一点。
分析设置:
{
"analysis":{
"analyzer":{
"ngram_analyzer":{
"type":"custom",
"char_filter":[
"number_char_filter_map",
"remove_duplicates"
],
"tokenizer":"ngram_tokenizer_whitespace",
"filter":[
"lowercase",
"english_stop"
]
},
"word_analyzer":{
"type":"custom",
"char_filter":[
"number_char_filter_map",
"remove_duplicates"
],
"tokenizer":"word_tokenizer",
"filter":[
"lowercase",
"english_stop"
]
}
},
"char_filter":{
"remove_duplicates":{
"type":"pattern_replace",
"pattern":"(.)(?=\1)",
"replacement":""
},
"remove_white_spaces":{
"type":"pattern_replace",
"pattern":"(\s)",
"replacement":""
}
},
"filter":{
"english_stop":{
"type":"stop",
"ignore_case":true,
"stopwords":"_english_"
}
},
"tokenizer":{
"ngram_tokenizer":{
"type":"ngram",
"min_gram":2,
"max_gram":7
},
"ngram_tokenizer_whitespace":{
"type":"ngram",
"min_gram":2,
"max_gram":7,
"token_chars":[
"letter",
"digit",
"punctuation",
"symbol"
]
},
"word_tokenizer":{
"type":"standard"
}
}
}
}
根据 ElasticSearch 文档,我没有在字段中找到 search_analyzer
的任何定义。如果此方法不起作用,是否有任何替代结构来包含搜索分析器?
几天后问题解决了...
问题是使用与 search_analyzer
相同的分析器。我只是在设置中设置了另一个名称不同的分析器(使用与分析器相同的密钥,search_analyzer 似乎会导致问题并忽略 search_analyzer)。
克隆 ngram_analyzer
到 ngram_search_analyzer
克隆 word_analyzer
到 word_search_analyzer
创建映射请求:
PUT /suggestions HTTP/1.1
Host: localhost:9200
Content-Type: application/json
{
"mappings":{
"doc":{
"properties":{
"caption":{
"type":"keyword",
"fields":{
"ngram":{
"type":"text",
"analyzer":"ngram_analyzer",
"search_analyzer":"ngram_search_analyzer"
},
"word":{
"type":"text",
"analyzer":"word_analyzer",
"search_analyzer":"word_search_analyzer"
}
}
}
}
}
},
"settings":{
"number_of_shards":1,
"number_of_replicas":1,
"routing_partition_size":1,
"analysis":{
"analyzer":{
"ngram_analyzer":{
"type":"custom",
"char_filter":[
"number_char_filter_map",
"remove_duplicates"
],
"tokenizer":"ngram_tokenizer_whitespace",
"filter":[
"lowercase",
"english_stop"
]
},
"ngram_search_analyzer":{
"type":"custom",
"char_filter":[
"number_char_filter_map",
"remove_duplicates"
],
"tokenizer":"ngram_tokenizer_whitespace",
"filter":[
"lowercase",
"english_stop"
]
},
"word_analyzer":{
"type":"custom",
"char_filter":[
"number_char_filter_map",
"remove_duplicates"
],
"tokenizer":"word_tokenizer",
"filter":[
"lowercase",
"english_stop"
]
},
"word_search_analyzer":{
"type":"custom",
"char_filter":[
"number_char_filter_map",
"remove_duplicates"
],
"tokenizer":"word_tokenizer",
"filter":[
"lowercase",
"english_stop"
]
}
},
"char_filter":{
"number_char_filter_map":{
"type":"mapping",
"mappings":[
"\u0660 => 0",
"\u0661 => 1",
"\u0662 => 2",
"\u0663 => 3",
"\u0664 => 4",
"\u0665 => 5",
"\u0666 => 6",
"\u0667 => 7",
"\u0668 => 8",
"\u0669 => 9",
"\u06f0 => 0",
"\u06f1 => 1",
"\u06f2 => 2",
"\u06f3 => 3",
"\u06f4 => 4",
"\u06f5 => 5",
"\u06f6 => 6",
"\u06f7 => 7",
"\u06f8 => 8",
"\u06f9 => 9"
]
},
"remove_duplicates":{
"type":"pattern_replace",
"pattern":"(.)(?=\1)",
"replacement":""
},
"remove_white_spaces":{
"type":"pattern_replace",
"pattern":"(\s)",
"replacement":""
}
},
"filter":{
"english_stop":{
"type":"stop",
"ignore_case":true,
"stopwords":"_english_"
}
},
"tokenizer":{
"ngram_tokenizer":{
"type":"ngram",
"min_gram":2,
"max_gram":7
},
"ngram_tokenizer_whitespace":{
"type":"ngram",
"min_gram":2,
"max_gram":7,
"token_chars":[
"letter",
"digit",
"punctuation",
"symbol"
]
},
"word_tokenizer":{
"type":"standard",
"token_chars":[
]
}
}
}
}
}
现在我在映射中看到了搜索分析器:]
我认为拥有不同的分析器和搜索分析器以供以后自定义也是一件好事。