如何计算 Elastic Search 中符合条件的桶数?

How do I count the number of buckets that match a condition in Elastic Search?

我有一组描述用户分数的文档。同一个用户会有多个分数。

我的数据结构如下:

[
  { "user_id" : 3, "score" : 10 },
  { "user_id" : 1, "score" : 20 },
  { "user_id" : 2, "score" : 60 },
  { "user_id" : 1, "score" : 10 },
  ...
]

我正在尝试确定每个用户的最高分数。我使用的弹性搜索查询如下所示:

{
  "size": 0,
  "aggs": {
    "users": {
      "terms": {
        "field": "user_id",
        "size": 9999
      },
      "aggs": {
        "max_score": {
          "max": {
            "field": "score"
          }
        }
      }
    }
  }
}

响应如下所示:

  "aggregations": {
    "users": {
      "buckets": [
        {
          "key": "1",
          "doc_count": 10,
          "max_score": {
            "value": 10
          }
        },
        {
          "key": "2",
          "doc_count": 10,
          "max_score": {
            "value": 20
          }
        },
        ...
      ]
    }
  }
}

如何找到 max_score > 20max_score > 50max_score > 100 的桶数?

有没有办法让响应看起来像下面这样?

  "aggregations": {
    "users": {
      "buckets": [
        {
          "key": "1",
          "doc_count": 10,
          "max_score": {
            "value": 10
          }
        },
        ...
      ],
      "scoresGreaterThan20": {
         "value": 10
      },
      "scoresGreaterThan50": {
         "value": 5
      },
      "scoresGreaterThan100": {
         "value": 2
      },
    }
  }
}

您可以通过针对您需要的不同条件重复相同的术语和最大聚合以及 bucket selector aggregation 来实现您的用例。添加一个工作示例 -

索引数据:

  { "user_id" : 3, "score" : 10 }
  { "user_id" : 1, "score" : 20 }
  { "user_id" : 2, "score" : 60 }
  { "user_id" : 1, "score" : 10 }

搜索查询:

执行桶选择器聚合后,您可以使用stats bucket aggregation获取桶的计数。

{
  "size": 0,
  "aggs": {
    "user_gt20": {
      "terms": {
        "field": "user_id",
        "size": 9999
      },
      "aggs": {
        "max_score": {
          "max": {
            "field": "score"
          }
        },
        "scoresGreaterThan20": {
          "bucket_selector": {
            "buckets_path": {
              "values": "max_score"
            },
            "script": "params.values > 20"
          }
        }
      }
    },
    "user_gt20_count": {
      "stats_bucket": {
        "buckets_path": "user_gt20._count"
      }
    },
    "user_gt50": {
      "terms": {
        "field": "user_id",
        "size": 9999
      },
      "aggs": {
        "max_score": {
          "max": {
            "field": "score"
          }
        },
        "scoresGreaterThan50": {
          "bucket_selector": {
            "buckets_path": {
              "values": "max_score"
            },
            "script": "params.values > 50"
          }
        }
      }
    },
    "user_gt50_count": {
      "stats_bucket": {
        "buckets_path": "user_gt50._count"
      }
    },
    "user_gt100": {
      "terms": {
        "field": "user_id",
        "size": 9999
      },
      "aggs": {
        "max_score": {
          "max": {
            "field": "score"
          }
        },
        "scoresGreaterThan100": {
          "bucket_selector": {
            "buckets_path": {
              "values": "max_score"
            },
            "script": "params.values > 100"
          }
        }
      }
    },
    "user_gt100_count": {
      "stats_bucket": {
        "buckets_path": "user_gt100._count"
      }
    }
  }
}

搜索结果:

 "aggregations": {
    "user_gt100": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": []
    },
    "user_gt20": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": 2,
          "doc_count": 1,
          "max_score": {
            "value": 60.0
          }
        }
      ]
    },
    "user_gt50": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": 2,
          "doc_count": 1,
          "max_score": {
            "value": 60.0
          }
        }
      ]
    },
    "user_gt20_count": {
      "count": 1,            // note this
      "min": 1.0,
      "max": 1.0,
      "avg": 1.0,
      "sum": 1.0
    },
    "user_gt50_count": {
      "count": 1,             // note this
      "min": 1.0,
      "max": 1.0,
      "avg": 1.0,
      "sum": 1.0
    },
    "user_gt100_count": {
      "count": 0,             // note this
      "min": null,
      "max": null,
      "avg": null,
      "sum": 0.0
    }
  }