在 doc_count 的基础上对 ElasticSearch 中的桶进行排序

Ordering the Buckets in ElasticSearch on the basis of doc_count

我是 ElasticSearch 的新手,我目前正在尝试编写一个涉及聚合的 ElasticSearch 查询,它将为我获取涉及某些字段的前 5 个存储桶(查询中涉及的字段数量是动态的,它可以介于 2字段和 5 个字段)。

我面临的问题是团队的当前 ElasticSearch 版本不支持多词项,因此我恢复了使用 top_hits 作为子聚合器的复合聚合,这样我就可以得到前 5 个桶。但是,我没有得到基于 doc_count / hits.

的排序结果。

要求:

{
  "query":{
    "bool":{
      "must":[{
          "exists":{"field":"uuid"}
        },{
          "query_string":{
            "query":"*","lowercase_expanded_terms":false
          }
        }]
    }
  },
  "aggs":{
    "test_aggregation":{
      "composite":{
        "sources":[{
          "zipCode":{"terms":{"field":"zipCode"}},
          "routeCode":{"terms":{"field":"routeCode"}}
        }]
      }
    },
    "aggs":{
      "test_aggregation_hits":{
        "top_hits":{
          "size":5,
          "_source":{
            "includes":["uuid"]
          }
        }
      }
    }
  },
  "size":"0"
}

回复:

{
    "took": 310,
    "timed_out": false,
    "num_reduce_phases": 2,
    "_shards": {
        "total": 140,
        "successful": 140,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": 8400,
        "max_score": 0.0,
        "hits": []
    },
    "aggregations": {
        "test_aggregation": {
            "after_key": {
                "zipCode": "someRandomZipCode",
                "routeCode": "someRandomRouteCode"
            },
            "buckets": [{
                "key": {
                    "zipCode": "someRandomValue-1",
                    "routeCode": "someRandomRouteCode-1"
                },
                "doc_count": 36,
                "test_aggregation_hits": {
                    "hits": {
                        "total": 36,
                        "max_score": 11.5650015,
                        "hits": [{
                          .... // some data
                        }]
                    }
                 }
            }, {
                "key": {
                    "zipCode": "someRandomValue-2",
                    "routeCode": "someRandomRouteCode-2"
                },
                "doc_count": 40,
                "test_aggregation_hits": {
                    "hits": {
                        "total": 40,
                        "max_score": 11.5658015,
                        "hits": [{
                          .... // some data
                        }]
                    }
                 }
            }, {
                "key": {
                    "zipCode": "someRandomValue-3",
                    "routeCode": "someRandomRouteCode-3"
                },
                "doc_count": 13,
                "test_aggregation_hits": {
                    "hits": {
                        "total": 13,
                        "max_score": 11.5750015,
                        "hits": [{
                          .... // some data
                        }]
                    }
                 }
            }
            ....  
            ....
          ]
        }
      }
}
  1. 我想知道我采用的方法是否正确?
  2. 如果方法不对,那我该如何解决呢?
  3. 如果方法是正确的,那么我做错了什么以及如何根据 doc_count 订购桶?

能够通过使用桶排序 ElasticSearch 聚合解决这个问题。

最终查询看起来像这样。

{
  "query":{
    "bool":{
      "must":[{
          "exists":{"field":"uuid"}
        },{
          "query_string":{
            "query":"*","lowercase_expanded_terms":false
          }
        }]
    }
  },
  "aggs":{
    "test_aggregation":{
      "composite":{
        "sources":[{
          "zipCode":{"terms":{"field":"zipCode"}},
          "routeCode":{"terms":{"field":"routeCode"}}
        }],
        "size":"100"
      }
    },
    "aggs":{
      "test_aggregation_hits":{
        "top_hits":{
          "size":"25",
          "_source":{
            "includes":["uuid"]
          }
        }
      },
      "sort_buckets": {
         "size":"10",
         "sort": [
            {"_count": { "order": "desc" } }
         ]
      }
    }
  },
  "size":"0"
}