Elasticsearch 匹配查询不匹配带撇号的文档
Elasticsearch match query does not match a document with apostrophe
我正在为地点自动完成构建一个搜索器,它是 Google 地图搜索器的一个更简单版本。我使用的查询似乎一切正常:
{
"query": {
"bool": {
"must": {
"multi_match": {
"query": "Ametlla",
"type": "best_fields",
"fields": [
"locality",
"alternative_names"
],
"operator": "and"
}
},
"filter": {
"term": {
"country_code": "ES"
}
}
}
}
}
我发现的问题与西班牙的一个城市有关:L'Ametlla de Mar。
/localities_index/localities/10088
{
"_index": "localities_index",
"_type": "localities",
"_id": "10088",
"_version": 1,
"_seq_no": 133,
"_primary_term": 4,
"found": true,
"_source": {
"country_code": "es",
"locality": "L'Ametlla de Mar",
"alternative_names": []
}
}
您可以搜索 Ametlla
并且匹配(请参阅以下部分名称示例查询)
{
"query": {
"match": {
"locality": {
"query" : "Ametlla"
}
}
}
}
/localities_index/localities/10088/_explain
{
"_index": "localities_index",
"_type": "localities",
"_id": "10088",
"matched": true,
"explanation": {
"value": 3.3985975,
"description": "weight(locality:ametlla in 2) [PerFieldSimilarity], result of:",
"details": [
{
"value": 3.3985975,
"description": "score(freq=1.0), product of:",
"details": [
{
"value": 2.2,
"description": "boost",
"details": []
},
{
"value": 3.6686769,
"description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
"details": [
{
"value": 2,
"description": "n, number of documents containing term",
"details": []
},
{
"value": 97,
"description": "N, total number of documents with field",
"details": []
}
]
},
{
"value": 0.4210829,
"description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
"details": [
{
"value": 1.0,
"description": "freq, occurrences of term within document",
"details": []
},
{
"value": 1.2,
"description": "k1, term saturation parameter",
"details": []
},
{
"value": 0.75,
"description": "b, length normalization parameter",
"details": []
},
{
"value": 9.0,
"description": "dl, length of field",
"details": []
},
{
"value": 7.5360823,
"description": "avgdl, average length of field",
"details": []
}
]
}
]
}
]
}
}
但如果您使用其全名,则不是。
我试过将 punctuation
添加到 token_chars,正如我在 看到的那样,但没有成功。所以我尝试将 '
添加为 custom_token_chars
但它也没有用。
/localities_index/_settings
{
"localities_index": {
"settings": {
"index": {
"number_of_shards": "1",
"provided_name": "localities_index",
"creation_date": "1596537683568",
"analysis": {
"analyzer": {
"autocomplete": {
"filter": [
"lowercase",
"asciifolding"
],
"tokenizer": "autocomplete"
},
"autocomplete_search": {
"filter": [
"lowercase",
"asciifolding"
],
"tokenizer": "lowercase"
}
},
"tokenizer": {
"autocomplete": {
"token_chars": [
"letter",
"digit"
],
"custom_token_chars": "'",
"min_gram": "1",
"type": "edge_ngram",
"max_gram": "15"
}
}
},
"number_of_replicas": "1",
"uuid": "lS3Ork2zSySYJbJYmx29aw",
"version": {
"created": "7040099"
}
}
}
}
}
/localities_index/_mapping
{
"localities_index": {
"mappings": {
"properties": {
"alternative_names": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
},
"country_code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"locality": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
}
}
}
}
}
您可以在您的自定义分析器中使用 Apostrophe token filter 并在您的字段(包含它们的位置)上使用它并使用您已经在使用的 match
查询,因为它将使用相同的分析器在索引时使用,你会得到预期的结果。
我正在为地点自动完成构建一个搜索器,它是 Google 地图搜索器的一个更简单版本。我使用的查询似乎一切正常:
{
"query": {
"bool": {
"must": {
"multi_match": {
"query": "Ametlla",
"type": "best_fields",
"fields": [
"locality",
"alternative_names"
],
"operator": "and"
}
},
"filter": {
"term": {
"country_code": "ES"
}
}
}
}
}
我发现的问题与西班牙的一个城市有关:L'Ametlla de Mar。
/localities_index/localities/10088
{
"_index": "localities_index",
"_type": "localities",
"_id": "10088",
"_version": 1,
"_seq_no": 133,
"_primary_term": 4,
"found": true,
"_source": {
"country_code": "es",
"locality": "L'Ametlla de Mar",
"alternative_names": []
}
}
您可以搜索 Ametlla
并且匹配(请参阅以下部分名称示例查询)
{
"query": {
"match": {
"locality": {
"query" : "Ametlla"
}
}
}
}
/localities_index/localities/10088/_explain
{
"_index": "localities_index",
"_type": "localities",
"_id": "10088",
"matched": true,
"explanation": {
"value": 3.3985975,
"description": "weight(locality:ametlla in 2) [PerFieldSimilarity], result of:",
"details": [
{
"value": 3.3985975,
"description": "score(freq=1.0), product of:",
"details": [
{
"value": 2.2,
"description": "boost",
"details": []
},
{
"value": 3.6686769,
"description": "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:",
"details": [
{
"value": 2,
"description": "n, number of documents containing term",
"details": []
},
{
"value": 97,
"description": "N, total number of documents with field",
"details": []
}
]
},
{
"value": 0.4210829,
"description": "tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:",
"details": [
{
"value": 1.0,
"description": "freq, occurrences of term within document",
"details": []
},
{
"value": 1.2,
"description": "k1, term saturation parameter",
"details": []
},
{
"value": 0.75,
"description": "b, length normalization parameter",
"details": []
},
{
"value": 9.0,
"description": "dl, length of field",
"details": []
},
{
"value": 7.5360823,
"description": "avgdl, average length of field",
"details": []
}
]
}
]
}
]
}
}
但如果您使用其全名,则不是。
我试过将 punctuation
添加到 token_chars,正如我在 看到的那样,但没有成功。所以我尝试将 '
添加为 custom_token_chars
但它也没有用。
/localities_index/_settings
{
"localities_index": {
"settings": {
"index": {
"number_of_shards": "1",
"provided_name": "localities_index",
"creation_date": "1596537683568",
"analysis": {
"analyzer": {
"autocomplete": {
"filter": [
"lowercase",
"asciifolding"
],
"tokenizer": "autocomplete"
},
"autocomplete_search": {
"filter": [
"lowercase",
"asciifolding"
],
"tokenizer": "lowercase"
}
},
"tokenizer": {
"autocomplete": {
"token_chars": [
"letter",
"digit"
],
"custom_token_chars": "'",
"min_gram": "1",
"type": "edge_ngram",
"max_gram": "15"
}
}
},
"number_of_replicas": "1",
"uuid": "lS3Ork2zSySYJbJYmx29aw",
"version": {
"created": "7040099"
}
}
}
}
}
/localities_index/_mapping
{
"localities_index": {
"mappings": {
"properties": {
"alternative_names": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
},
"country_code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"locality": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
}
}
}
}
}
您可以在您的自定义分析器中使用 Apostrophe token filter 并在您的字段(包含它们的位置)上使用它并使用您已经在使用的 match
查询,因为它将使用相同的分析器在索引时使用,你会得到预期的结果。