Elasticsearch 排序脚本仅对少数文档无法按预期工作

Elasticsearch sort script not working as expected for few documents only

考虑这样一个查询:

{
  "size": 200, 
  "query": {
     "bool" : {
     ....
     }
   },

   "sort": {
      "_script" : {
        "script" : {
         "source" : "params._source.participants[0].participantEmail",
         "lang" : "painless"
       },
       "type" : "string",
       "order" : "desc"
     }
   }
}

此查询几乎适用于所有文档,因为其中一些文档不在正确的位置。怎么可能?

最后一个文档的顺序是这样的(我显示的是每个文档的参与者数组的第一项):

shiend@....
denys@...
Lynn@...

怎么可能?我没有方向。排序查询有误吗?

设置:

 "myindex" : {
    "settings" : {
      "index" : {
        "refresh_interval" : "30s",
        "number_of_shards" : "5",
        "provided_name" : "myindex",
        "creation_date" : "1600703588497",
        "analysis" : {
          "filter" : {
            "english_keywords" : {
              "keywords" : [
                "example"
              ],
              "type" : "keyword_marker"
            },
            "english_stemmer" : {
              "type" : "stemmer",
              "language" : "english"
            },
            "synonym" : {
              "type" : "synonym",
              "synonyms_path" : "analysis/UK_US_Sync_2.csv",
              "updateable" : "true"
            },
            "english_possessive_stemmer" : {
              "type" : "stemmer",
              "language" : "possessive_english"
            },
            "english_stop" : {
              "type" : "stop",
              "stopwords" : "_english_"
            },
            "my_katakana_stemmer" : {
              "type" : "kuromoji_stemmer",
              "minimum_length" : "4"
            }
          },
          "normalizer" : {
            "custom_normalizer" : {
              "filter" : [
                "lowercase",
                "asciifolding"
              ],
              "type" : "custom",
              "char_filter" : [ ]
            }
          },
          "analyzer" : {
            "somevar_english" : {
              "filter" : [
                "english_possessive_stemmer",
                "lowercase",
                "english_stop",
                "english_keywords",
                "english_stemmer",
                "asciifolding",
                "synonym"
              ],
              "tokenizer" : "standard"
            },
            "myvar_chinese" : {
              "filter" : [
                "porter_stem"
              ],
              "tokenizer" : "smartcn_tokenizer"
            },
            "myvar" : {
              "filter" : [
                "my_katakana_stemmer"
              ],
              "tokenizer" : "kuromoji_tokenizer"
            }
          }
        },
        "number_of_replicas" : "1",
        "uuid" : "d0LlBVqIQGSk4afEWFD",
        "version" : {
          "created" : "6081099",
          "upgraded" : "6081299"
        }
      }
    }
  }

映射:

{
    "myindex": {
        "mappings": {
            "doc": {
                "dynamic_date_formats": [
                    "yyyy-MM-dd HH:mm:ss.SSS"
                ],
                "properties": {
                    "all_fields": {
                        "type": "text"
                    },
                    "participants": {
                        "type": "nested",
                        "include_in_root": true,
                        "properties": {
                            "participantEmail": {
                                "type": "keyword",
                                "fields": {
                                    "keyword": {
                                        "type": "keyword",
                                        "ignore_above": 256,
                                        "normalizer": "custom_normalizer"
                                    }
                                },
                                "copy_to": [
                                    "all_fields"
                                ]
                            },
                            "participantType": {
                                "type": "text",
                                "fields": {
                                    "keyword": {
                                        "type": "keyword",
                                        "ignore_above": 256,
                                        "normalizer": "custom_normalizer"
                                    }
                                },
                                "copy_to": [
                                    "all_fields"
                                ]
                            }
                        }
                    }
                }
            }
        }
    }
}

编辑:可能是因为电子邮件 Lynn@.. 以大写字母开头?

确实,字符串是按词法顺序排序的,即大写字母排在小写字母之前(降序顺序相反)

您可以将脚本中的所有电子邮件小写:

"sort": {
  "_script" : {
    "script" : {
     "source" : "params._source.participants[0].participantEmail.toLowerCase()",
     "lang" : "painless"
   },
   "type" : "string",
   "order" : "desc"
 }

}