Elasticsearch聚合之聚合

Elasticsearch aggregation of aggregation

我想知道是否有一种方法可以做一些类似于 bucket_selector 的事情,但是是基于键匹配而不是数字度量进行测试。

为了提供更多背景信息,这是我的用例:

数据样本:

[
  {
    "@version": "1",
    "@timestamp": "2017-04-27T04:28:23.589Z",
    "type": "json",
    "headers": {
      "message": {
        "type": "requestactivation"
      }
    },
    "id": "668"
  },
  {
    "@version": "1",
    "@timestamp": "2017-04-27T04:32:23.589Z",
    "type": "json",
    "headers": {
      "message": {
        "type": "requestactivation"
      }
    },
    "id": "669"
  },
  {
    "@version": "1",
    "@timestamp": "2017-04-27T04:30:00.802Z",
    "type": "json",
    "headers": {
      "message": {
        "type": "activationrequested"
      }
    },
    "id": "668"
  }
]

我想检索最后一个事件类型为 requestactivation 的所有 ID。

我已经有一个聚合可以检索每个 ID 的最后一个事件类型, 但我还没有想出如何根据键

过滤桶

这里是查询:

{
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        {
          "exists": {
            "field": "id"
          }
        },
        {
          "terms": {
            "headers.message.type": [
              "requestactivation",
              "activationrequested"
            ]
          }
        }
      ]
    }
  },
  "aggs": {
    "id": {
      "terms": {
        "field": "id",
        "size": 10000
      },
      "aggs": {
        "latest": {
          "max": {
            "field": "@timestamp"
          }
        },
        "hmtype": {
          "terms": {
            "field": "headers.message.type",
            "size": 1
          }
        }
      }
    }
  }
}

这是一个结果样本:

{
  "took": 5,
  "timed_out": false,
  "_shards": {
    "total": 3,
    "successful": 3,
    "failed": 0
  },
  "hits": {
    "total": 3,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "id": {
      "doc_count_error_upper_bound": 3,
      "sum_other_doc_count": 46,
      "buckets": [
        {
          "key": "986",
          "doc_count": 4,
          "hmtype": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 2,
            "buckets": [
              {
                "key": "activationrequested",
                "doc_count": 2
              }
            ]
          },
          "latest": {
            "value": 1493238253603,
            "value_as_string": "2017-04-26T20:24:13.603Z"
          }
        },
        {
          "key": "967",
          "doc_count": 2,
          "hmtype": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 1,
            "buckets": [
              {
                "key": "requestactivation",
                "doc_count": 1
              }
            ]
          },
          "latest": {
            "value": 1493191161242,
            "value_as_string": "2017-04-26T07:19:21.242Z"
          }
        },
        {
          "key": "554",
          "doc_count": 7,
          "hmtype": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 5,
            "buckets": [
              {
                "key": "requestactivation",
                "doc_count": 5
              }
            ]
          },
          "latest": {
            "value": 1493200196871,
            "value_as_string": "2017-04-26T09:49:56.871Z"
          }
        }
      ]
    }
  }
}

所有映射均未分析(关键字)。

目标是将结果减少到只有存储桶中的键为 "requestactivation" 的结果。

无法使用文档计数,因为激活请求可以针对一个 ID 出现多次。

最近才开始研究聚合,所以如果问题看起来很明显,我们深表歉意,周围的例子似乎不符合这个特定的逻辑。

terms 聚合中使用 include 如何将条款中包含的值 "filter" 仅与请求相关:

{
  "size": 0,
  "query": {
    "bool": {
      "filter": [
        {
          "exists": {
            "field": "id"
          }
        },
        {
          "terms": {
            "headers.message.type": [
              "requestactivation",
              "activationrequested"
            ]
          }
        }
      ]
    }
  },
  "aggs": {
    "id": {
      "terms": {
        "field": "id",
        "size": 10000
      },
      "aggs": {
        "latest": {
          "max": {
            "field": "@timestamp"
          }
        },
        "hmtype": {
          "filter": {
            "terms": {
              "headers.message.type": [
                "requestactivation",
                "activationrequested"
              ]
            }
          },
          "aggs": {
            "count_types": {
              "cardinality": {
                "field": "headers.message.type"
              }
            }
          }
        },
        "filter_buckets": {
          "bucket_selector": {
            "buckets_path": {
              "totalTypes":"hmtype > count_types"
            },
            "script": "params.totalTypes == 2"
          }
        }
      }
    }
  }
}