使用 Elasticsearch (aggs) 分组以将字段加入值列表
Grouping with Elasticsearch (aggs) to join a field into a list of values
我有一个包含多种类型的索引。每条记录中的数据包括"Customer ID"和"Device Name"、"url"等字段
Elasticsearch 是 v5.6.8。
我最终想要的是每个 "Customer ID" 和 "Device Name" 一个文档以及文档的 _type 值。每个分组的单个文档应该有一个 'url' 值的列表,这些值连接到一个名为 'urls'.
的字段中
我尝试了以下方法,但它没有达到我的预期,我不确定还能尝试什么:
GET _search
{
"query": {
"bool": {
"must": [
{
"term": {
"_index": "safebrowsing"
}
},
{
"range": {
"eventtime": {
"gte": "now-5d/d"
}
}
}
],
"must_not": [
{
"term": {
"reported_to_client": true
}
}
]
}
},
"size": 0,
"aggs": {
"Customer ID": {
"terms": {
"field": "Customer ID.keyword"
},
"aggs": {
"Device Name": {
"terms": {
"field": "Device Name.keyword"
},
"aggs": {
"documenttype": {
"terms": {
"field": "_type"
},
"aggs": {
"urls": {
"terms": {
"script": "_doc['url'].values"
}
}
}
}
}
}
}
}
}
}
这是我得到的错误:
{
"error": {
"root_cause": [
{
"type": "circuit_breaking_exception",
"reason": "[script] Too many dynamic script compilations within one minute, max: [15/min]; please use on-disk, indexed, or scripts with parameters instead; this limit can be changed by the [script.max_compilations_per_minute] setting",
"bytes_wanted": 0,
"bytes_limit": 0
},
{
"type": "script_exception",
"reason": "compile error",
"script_stack": [
"_doc['url'].values",
"^---- HERE"
],
"script": "_doc['url'].values",
"lang": "painless"
}
],
...etc
我想通了...基本上,必须做的是拥有一个名为 top_hits 的聚合类型,其中 returns 实际点击(与 "size" 所指示的一样多)在每个更高级别的聚合。
GET /_search
{
"query": {
"bool": {
"must": [
{"term": {"_index": "safebrowsing"}},
{"range": {"eventtime": {"gte": "now-2d/d"}}}
],
"must_not": [
{"term": {"reported_to_client": true}}
]
}
},
"aggs": {
"Customer ID": {
"terms": {
"field": "Customer ID.keyword"
},
"aggs": {
"Device Name": {
"terms": {
"field": "Device Name.keyword"
},
"aggs": {
"thetype": {
"terms": {
"field": "_type"
},
"aggs": {
"thedocs": {
"top_hits": {
"sort": [{"eventtime": {"order": "desc"}}],
"_source": {
"includes": [ "ip", "type", "eventtime", "url" ]
},
"size": 2
}
}
}
}
}
}
}
}
},
"size": 0
}
我称为 thedocs
的聚合中的每个命中都如下所示:
{
"_index": "safebrowsing",
"_type": "SOCIAL_ENGINEERING",
"_id": "7ffe641xxxyyydc3536189ce33d5dfb9",
"_score": null,
"_source": {
"ip": "xxx.xxx.7.88",
"eventtime": "2018-05-08T23:34:03-07:00",
"type": "SOCIAL_ENGINEERING",
"url": "http://xyz-domainname.tld/bankofwhatever/"
},
"sort": [
1525847643000
]
}
我有一个包含多种类型的索引。每条记录中的数据包括"Customer ID"和"Device Name"、"url"等字段
Elasticsearch 是 v5.6.8。
我最终想要的是每个 "Customer ID" 和 "Device Name" 一个文档以及文档的 _type 值。每个分组的单个文档应该有一个 'url' 值的列表,这些值连接到一个名为 'urls'.
的字段中我尝试了以下方法,但它没有达到我的预期,我不确定还能尝试什么:
GET _search
{
"query": {
"bool": {
"must": [
{
"term": {
"_index": "safebrowsing"
}
},
{
"range": {
"eventtime": {
"gte": "now-5d/d"
}
}
}
],
"must_not": [
{
"term": {
"reported_to_client": true
}
}
]
}
},
"size": 0,
"aggs": {
"Customer ID": {
"terms": {
"field": "Customer ID.keyword"
},
"aggs": {
"Device Name": {
"terms": {
"field": "Device Name.keyword"
},
"aggs": {
"documenttype": {
"terms": {
"field": "_type"
},
"aggs": {
"urls": {
"terms": {
"script": "_doc['url'].values"
}
}
}
}
}
}
}
}
}
}
这是我得到的错误:
{
"error": {
"root_cause": [
{
"type": "circuit_breaking_exception",
"reason": "[script] Too many dynamic script compilations within one minute, max: [15/min]; please use on-disk, indexed, or scripts with parameters instead; this limit can be changed by the [script.max_compilations_per_minute] setting",
"bytes_wanted": 0,
"bytes_limit": 0
},
{
"type": "script_exception",
"reason": "compile error",
"script_stack": [
"_doc['url'].values",
"^---- HERE"
],
"script": "_doc['url'].values",
"lang": "painless"
}
],
...etc
我想通了...基本上,必须做的是拥有一个名为 top_hits 的聚合类型,其中 returns 实际点击(与 "size" 所指示的一样多)在每个更高级别的聚合。
GET /_search
{
"query": {
"bool": {
"must": [
{"term": {"_index": "safebrowsing"}},
{"range": {"eventtime": {"gte": "now-2d/d"}}}
],
"must_not": [
{"term": {"reported_to_client": true}}
]
}
},
"aggs": {
"Customer ID": {
"terms": {
"field": "Customer ID.keyword"
},
"aggs": {
"Device Name": {
"terms": {
"field": "Device Name.keyword"
},
"aggs": {
"thetype": {
"terms": {
"field": "_type"
},
"aggs": {
"thedocs": {
"top_hits": {
"sort": [{"eventtime": {"order": "desc"}}],
"_source": {
"includes": [ "ip", "type", "eventtime", "url" ]
},
"size": 2
}
}
}
}
}
}
}
}
},
"size": 0
}
我称为 thedocs
的聚合中的每个命中都如下所示:
{
"_index": "safebrowsing",
"_type": "SOCIAL_ENGINEERING",
"_id": "7ffe641xxxyyydc3536189ce33d5dfb9",
"_score": null,
"_source": {
"ip": "xxx.xxx.7.88",
"eventtime": "2018-05-08T23:34:03-07:00",
"type": "SOCIAL_ENGINEERING",
"url": "http://xyz-domainname.tld/bankofwhatever/"
},
"sort": [
1525847643000
]
}