Elasticsearch post 聚合字符串过滤器
Elasticsearch post aggregation string filter
我有一个系统,其中的设备通过一些网关进行通信,然后在后端指标中保存在 elasticsearch 中。
我想知道现在通过特定 gateway_id 通信的传感器。
我有一个这样的映射:
{
"mappings": {
"properties": {
"context": {
"properties": {
"gateway": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}},
"timeserver": {
"type": "date"
},
"timestamp": {
"type": "date"
},
"type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "double"
}
}
}
}
在网关字段中,以字符串的形式保存了用于每个指标的网关 ID。
我可以通过以下查询获得每台设备的最后一次通信:
GET _search
{
"size": 0,
"aggs": {
"id_agg": {
"terms": {
"field": "context.id.keyword"
, "size": 10000
},
"aggs": {
"group_docs": {
"top_hits": {
"size": 1,
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}
}
}
}
},
"query": {
"bool": {
"filter": [
{
"term": {
"_index": "measurements.group.*"
}
}
]
}
}
}
但如何过滤此聚合结果,以仅获取当前正在使用特定网关的传感器?添加如下内容:"filter": {"term":{"context.gateway": {"value": "request_gateway_serial" }} },
我搜索了 bucket_selector 聚合和管道聚合,但没有找到方法,在我看来它们只适用于数值,没有字符串,就像我的网关字段一样。
查询示例returns:(每个设备的最近通信列表)
"aggregations" : {
{
"key" : "1234",
"context" : {
"gateway" : "123456",
"id" : "1234",
},{
"key" : "12345",
"context" : {
"gateway" : "1234567",
"id" : "12345",
}, {
"key" : "12345678",
"context" : {
"gateway" : "1234567",
"id" : "12345678",
}}
我的预期结果是过滤 "gateway":“1234567”,只得到 "key":“12345”和 "key":“12345678”
您可以使用 filter aggregation
GET sensors/_search
{
"size": 0,
"aggs": {
"filter_gateway": {
"filter": {
"term": {
"context.gateway.keyword": "request_gateway_serial"
}
},
"aggs": {
"id_agg": {
"terms": {
"field": "context.id.keyword",
"size": 10000
},
"aggs": {
"group_docs": {
"top_hits": {
"size": 1,
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}
}
}
}
}
}
},
"query": {
"bool": {
"filter": [
{
"term": {
"_index": "measurements.group.*"
}
}
]
}
}
}
根据您的要求,您还可以在查询部分过滤文档,然后对其进行聚合。
编辑 1:
在下面的查询中,我得到了设备 ID 下的最大时间戳
以及在给定网关上过滤的最大时间戳。如果两个日期相同,它将给出最后与网关通信的设备 ID。
例如。
查询:
GET sensors/_search
{
"size": 0,
"aggs": {
"id_agg": {
"terms": {
"field": "context.id.keyword",
"size": 10000
},
"aggs": {
"maxDate": {
"max": {
"field": "context.timestamp"
}
},
"Filter": {
"filter": {
"term": {
"context.gateway": "1234568"
}
},
"aggs": {
"filtered_maxdate": {
"max": {
"field": "context.timestamp"
}
}
}
},
"last_geteway_filter": {
"bucket_selector": {
"buckets_path": {
"filtereddate": "Filter>filtered_maxdate",
"maxDate": "maxDate"
},
"script": "params.filtereddate==params.maxDate"
}
}
}
}
}
}
数据:
[
{
"_index" : "sensors",
"_type" : "_doc",
"_id" : "eiZ1pW0BcOVYVz455V6s",
"_score" : 1.0,
"_source" : {
"context.gateway" : "1234567",
"context.id" : 1234,
"context.timestamp" : "2019-10-02"
}
},
{
"_index" : "sensors",
"_type" : "_doc",
"_id" : "eyZ2pW0BcOVYVz45B14T",
"_score" : 1.0,
"_source" : {
"context.gateway" : "1234568",
"context.id" : 1234,
"context.timestamp" : "2019-10-03"
}
},
{
"_index" : "sensors",
"_type" : "_doc",
"_id" : "fCZ2pW0BcOVYVz45Jl6m",
"_score" : 1.0,
"_source" : {
"context.gateway" : "1234569",
"context.id" : 1234,
"context.timestamp" : "2019-10-04"
}
},
{
"_index" : "sensors",
"_type" : "_doc",
"_id" : "fSZ2pW0BcOVYVz45dV48",
"_score" : 1.0,
"_source" : {
"context.gateway" : "1234567",
"context.id" : 1235,
"context.timestamp" : "2019-10-02"
}
},
{
"_index" : "sensors",
"_type" : "_doc",
"_id" : "fiZ2pW0BcOVYVz45l17A",
"_score" : 1.0,
"_source" : {
"context.gateway" : "1234568",
"context.id" : 1235,
"context.timestamp" : "2019-10-03"
}
}
]
}
结果:
Device 12345 had last document under gateway 1234568
"buckets" : [
{
"key" : "1235",
"doc_count" : 2,
"Filter" : {
"doc_count" : 1,
"filtered_maxdate" : {
"value" : 1.5700608E12,
"value_as_string" : "2019-10-03T00:00:00.000Z"
}
},
"maxDate" : {
"value" : 1.5700608E12,
"value_as_string" : "2019-10-03T00:00:00.000Z"
}
}
]
我有一个系统,其中的设备通过一些网关进行通信,然后在后端指标中保存在 elasticsearch 中。
我想知道现在通过特定 gateway_id 通信的传感器。
我有一个这样的映射:
{
"mappings": {
"properties": {
"context": {
"properties": {
"gateway": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}},
"timeserver": {
"type": "date"
},
"timestamp": {
"type": "date"
},
"type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "double"
}
}
}
}
在网关字段中,以字符串的形式保存了用于每个指标的网关 ID。
我可以通过以下查询获得每台设备的最后一次通信:
GET _search
{
"size": 0,
"aggs": {
"id_agg": {
"terms": {
"field": "context.id.keyword"
, "size": 10000
},
"aggs": {
"group_docs": {
"top_hits": {
"size": 1,
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}
}
}
}
},
"query": {
"bool": {
"filter": [
{
"term": {
"_index": "measurements.group.*"
}
}
]
}
}
}
但如何过滤此聚合结果,以仅获取当前正在使用特定网关的传感器?添加如下内容:"filter": {"term":{"context.gateway": {"value": "request_gateway_serial" }} },
我搜索了 bucket_selector 聚合和管道聚合,但没有找到方法,在我看来它们只适用于数值,没有字符串,就像我的网关字段一样。
查询示例returns:(每个设备的最近通信列表)
"aggregations" : {
{
"key" : "1234",
"context" : {
"gateway" : "123456",
"id" : "1234",
},{
"key" : "12345",
"context" : {
"gateway" : "1234567",
"id" : "12345",
}, {
"key" : "12345678",
"context" : {
"gateway" : "1234567",
"id" : "12345678",
}}
我的预期结果是过滤 "gateway":“1234567”,只得到 "key":“12345”和 "key":“12345678”
您可以使用 filter aggregation
GET sensors/_search
{
"size": 0,
"aggs": {
"filter_gateway": {
"filter": {
"term": {
"context.gateway.keyword": "request_gateway_serial"
}
},
"aggs": {
"id_agg": {
"terms": {
"field": "context.id.keyword",
"size": 10000
},
"aggs": {
"group_docs": {
"top_hits": {
"size": 1,
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}
}
}
}
}
}
},
"query": {
"bool": {
"filter": [
{
"term": {
"_index": "measurements.group.*"
}
}
]
}
}
}
根据您的要求,您还可以在查询部分过滤文档,然后对其进行聚合。
编辑 1:
在下面的查询中,我得到了设备 ID 下的最大时间戳 以及在给定网关上过滤的最大时间戳。如果两个日期相同,它将给出最后与网关通信的设备 ID。
例如。
查询:
GET sensors/_search
{
"size": 0,
"aggs": {
"id_agg": {
"terms": {
"field": "context.id.keyword",
"size": 10000
},
"aggs": {
"maxDate": {
"max": {
"field": "context.timestamp"
}
},
"Filter": {
"filter": {
"term": {
"context.gateway": "1234568"
}
},
"aggs": {
"filtered_maxdate": {
"max": {
"field": "context.timestamp"
}
}
}
},
"last_geteway_filter": {
"bucket_selector": {
"buckets_path": {
"filtereddate": "Filter>filtered_maxdate",
"maxDate": "maxDate"
},
"script": "params.filtereddate==params.maxDate"
}
}
}
}
}
}
数据:
[
{
"_index" : "sensors",
"_type" : "_doc",
"_id" : "eiZ1pW0BcOVYVz455V6s",
"_score" : 1.0,
"_source" : {
"context.gateway" : "1234567",
"context.id" : 1234,
"context.timestamp" : "2019-10-02"
}
},
{
"_index" : "sensors",
"_type" : "_doc",
"_id" : "eyZ2pW0BcOVYVz45B14T",
"_score" : 1.0,
"_source" : {
"context.gateway" : "1234568",
"context.id" : 1234,
"context.timestamp" : "2019-10-03"
}
},
{
"_index" : "sensors",
"_type" : "_doc",
"_id" : "fCZ2pW0BcOVYVz45Jl6m",
"_score" : 1.0,
"_source" : {
"context.gateway" : "1234569",
"context.id" : 1234,
"context.timestamp" : "2019-10-04"
}
},
{
"_index" : "sensors",
"_type" : "_doc",
"_id" : "fSZ2pW0BcOVYVz45dV48",
"_score" : 1.0,
"_source" : {
"context.gateway" : "1234567",
"context.id" : 1235,
"context.timestamp" : "2019-10-02"
}
},
{
"_index" : "sensors",
"_type" : "_doc",
"_id" : "fiZ2pW0BcOVYVz45l17A",
"_score" : 1.0,
"_source" : {
"context.gateway" : "1234568",
"context.id" : 1235,
"context.timestamp" : "2019-10-03"
}
}
]
}
结果:
Device 12345 had last document under gateway 1234568
"buckets" : [
{
"key" : "1235",
"doc_count" : 2,
"Filter" : {
"doc_count" : 1,
"filtered_maxdate" : {
"value" : 1.5700608E12,
"value_as_string" : "2019-10-03T00:00:00.000Z"
}
},
"maxDate" : {
"value" : 1.5700608E12,
"value_as_string" : "2019-10-03T00:00:00.000Z"
}
}
]