Elasticsearch聚合之聚合
Elasticsearch aggregation of aggregation
我想知道是否有一种方法可以做一些类似于 bucket_selector 的事情,但是是基于键匹配而不是数字度量进行测试。
为了提供更多背景信息,这是我的用例:
数据样本:
[
{
"@version": "1",
"@timestamp": "2017-04-27T04:28:23.589Z",
"type": "json",
"headers": {
"message": {
"type": "requestactivation"
}
},
"id": "668"
},
{
"@version": "1",
"@timestamp": "2017-04-27T04:32:23.589Z",
"type": "json",
"headers": {
"message": {
"type": "requestactivation"
}
},
"id": "669"
},
{
"@version": "1",
"@timestamp": "2017-04-27T04:30:00.802Z",
"type": "json",
"headers": {
"message": {
"type": "activationrequested"
}
},
"id": "668"
}
]
我想检索最后一个事件类型为 requestactivation
的所有 ID。
我已经有一个聚合可以检索每个 ID 的最后一个事件类型,
但我还没有想出如何根据键
过滤桶
这里是查询:
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"exists": {
"field": "id"
}
},
{
"terms": {
"headers.message.type": [
"requestactivation",
"activationrequested"
]
}
}
]
}
},
"aggs": {
"id": {
"terms": {
"field": "id",
"size": 10000
},
"aggs": {
"latest": {
"max": {
"field": "@timestamp"
}
},
"hmtype": {
"terms": {
"field": "headers.message.type",
"size": 1
}
}
}
}
}
}
这是一个结果样本:
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"id": {
"doc_count_error_upper_bound": 3,
"sum_other_doc_count": 46,
"buckets": [
{
"key": "986",
"doc_count": 4,
"hmtype": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 2,
"buckets": [
{
"key": "activationrequested",
"doc_count": 2
}
]
},
"latest": {
"value": 1493238253603,
"value_as_string": "2017-04-26T20:24:13.603Z"
}
},
{
"key": "967",
"doc_count": 2,
"hmtype": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 1,
"buckets": [
{
"key": "requestactivation",
"doc_count": 1
}
]
},
"latest": {
"value": 1493191161242,
"value_as_string": "2017-04-26T07:19:21.242Z"
}
},
{
"key": "554",
"doc_count": 7,
"hmtype": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 5,
"buckets": [
{
"key": "requestactivation",
"doc_count": 5
}
]
},
"latest": {
"value": 1493200196871,
"value_as_string": "2017-04-26T09:49:56.871Z"
}
}
]
}
}
}
所有映射均未分析(关键字)。
目标是将结果减少到只有存储桶中的键为 "requestactivation" 的结果。
无法使用文档计数,因为激活请求可以针对一个 ID 出现多次。
最近才开始研究聚合,所以如果问题看起来很明显,我们深表歉意,周围的例子似乎不符合这个特定的逻辑。
在 terms
聚合中使用 include
如何将条款中包含的值 "filter" 仅与请求相关:
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"exists": {
"field": "id"
}
},
{
"terms": {
"headers.message.type": [
"requestactivation",
"activationrequested"
]
}
}
]
}
},
"aggs": {
"id": {
"terms": {
"field": "id",
"size": 10000
},
"aggs": {
"latest": {
"max": {
"field": "@timestamp"
}
},
"hmtype": {
"filter": {
"terms": {
"headers.message.type": [
"requestactivation",
"activationrequested"
]
}
},
"aggs": {
"count_types": {
"cardinality": {
"field": "headers.message.type"
}
}
}
},
"filter_buckets": {
"bucket_selector": {
"buckets_path": {
"totalTypes":"hmtype > count_types"
},
"script": "params.totalTypes == 2"
}
}
}
}
}
}
我想知道是否有一种方法可以做一些类似于 bucket_selector 的事情,但是是基于键匹配而不是数字度量进行测试。
为了提供更多背景信息,这是我的用例:
数据样本:
[
{
"@version": "1",
"@timestamp": "2017-04-27T04:28:23.589Z",
"type": "json",
"headers": {
"message": {
"type": "requestactivation"
}
},
"id": "668"
},
{
"@version": "1",
"@timestamp": "2017-04-27T04:32:23.589Z",
"type": "json",
"headers": {
"message": {
"type": "requestactivation"
}
},
"id": "669"
},
{
"@version": "1",
"@timestamp": "2017-04-27T04:30:00.802Z",
"type": "json",
"headers": {
"message": {
"type": "activationrequested"
}
},
"id": "668"
}
]
我想检索最后一个事件类型为 requestactivation
的所有 ID。
我已经有一个聚合可以检索每个 ID 的最后一个事件类型, 但我还没有想出如何根据键
过滤桶这里是查询:
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"exists": {
"field": "id"
}
},
{
"terms": {
"headers.message.type": [
"requestactivation",
"activationrequested"
]
}
}
]
}
},
"aggs": {
"id": {
"terms": {
"field": "id",
"size": 10000
},
"aggs": {
"latest": {
"max": {
"field": "@timestamp"
}
},
"hmtype": {
"terms": {
"field": "headers.message.type",
"size": 1
}
}
}
}
}
}
这是一个结果样本:
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"id": {
"doc_count_error_upper_bound": 3,
"sum_other_doc_count": 46,
"buckets": [
{
"key": "986",
"doc_count": 4,
"hmtype": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 2,
"buckets": [
{
"key": "activationrequested",
"doc_count": 2
}
]
},
"latest": {
"value": 1493238253603,
"value_as_string": "2017-04-26T20:24:13.603Z"
}
},
{
"key": "967",
"doc_count": 2,
"hmtype": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 1,
"buckets": [
{
"key": "requestactivation",
"doc_count": 1
}
]
},
"latest": {
"value": 1493191161242,
"value_as_string": "2017-04-26T07:19:21.242Z"
}
},
{
"key": "554",
"doc_count": 7,
"hmtype": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 5,
"buckets": [
{
"key": "requestactivation",
"doc_count": 5
}
]
},
"latest": {
"value": 1493200196871,
"value_as_string": "2017-04-26T09:49:56.871Z"
}
}
]
}
}
}
所有映射均未分析(关键字)。
目标是将结果减少到只有存储桶中的键为 "requestactivation" 的结果。
无法使用文档计数,因为激活请求可以针对一个 ID 出现多次。
最近才开始研究聚合,所以如果问题看起来很明显,我们深表歉意,周围的例子似乎不符合这个特定的逻辑。
在 terms
聚合中使用 include
如何将条款中包含的值 "filter" 仅与请求相关:
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"exists": {
"field": "id"
}
},
{
"terms": {
"headers.message.type": [
"requestactivation",
"activationrequested"
]
}
}
]
}
},
"aggs": {
"id": {
"terms": {
"field": "id",
"size": 10000
},
"aggs": {
"latest": {
"max": {
"field": "@timestamp"
}
},
"hmtype": {
"filter": {
"terms": {
"headers.message.type": [
"requestactivation",
"activationrequested"
]
}
},
"aggs": {
"count_types": {
"cardinality": {
"field": "headers.message.type"
}
}
}
},
"filter_buckets": {
"bucket_selector": {
"buckets_path": {
"totalTypes":"hmtype > count_types"
},
"script": "params.totalTypes == 2"
}
}
}
}
}
}