Elasticsearch 取平均值
Elasticsearch get average
我正在尝试对 elasticsearch 上的聚合数据进行平均。这是我的数据结构:
文档 1
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":2
},
{
"name":"topic_02",
"valore":4
}
]
}
文档 2
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":4
},
{
"name":"topic_02",
"valore":8
}
]
}
我必须通过 groupId 和主题名称创建一个聚合,并在此聚合上计算值字段的平均值。但是用源码试了一下求平均值的结果是错误的
根据文件一和文件二的上述数据,预期结果应该是:
groupId
topicName
average
TEST_01
topic_01
3
TEST_01
topic_02
6
TermsAggregationBuilder aggregation = AggregationBuilders
.terms("groupId")
.field("groupId.keyword")
.subAggregation(AggregationBuilders
.terms("topicName")
.field("topics.name.keyword").subAggregation(AggregationBuilders
.avg("avg").field("topics.valore")));
首先确保您的主题字段类型为“嵌套”,因为如果它是“对象”,则 topicName 和 valores 将被展平。这意味着您最终将得到一组价值和主题名称,它们之间没有关系。
映射
{
"test_ynsanity" : {
"mappings" : {
"properties" : {
"detectionDate" : {
"type" : "date"
},
"groupId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"lag" : {
"type" : "long"
},
"tipo" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"topics" : {
"type" : "nested",
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"valore" : {
"type" : "long"
}
}
}
}
}
}
}
正在摄取数据
POST test_ynsanity/_doc
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":2
},
{
"name":"topic_02",
"valore":4
}
]
}
POST test_ynsanity/_doc
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":4
},
{
"name":"topic_02",
"valore":8
}
]
}
查询
POST test_ynsanity/_search
{
"size": 0,
"aggs": {
"groups": {
"terms": {
"field": "groupId.keyword",
"size": 10
},
"aggs": {
"topics": {
"nested": {
"path": "topics"
},
"aggs": {
"topic_names": {
"terms": {
"field": "topics.name.keyword"
},
"aggs": {
"topic_avg": {
"avg": {
"field": "topics.valore"
}
}
}
}
}
}
}
}
}
}
回应
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"groups" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "TEST_01",
"doc_count" : 2,
"topics" : {
"doc_count" : 4,
"topic_names" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "topic_01",
"doc_count" : 2,
"NAME" : {
"value" : 3.0
}
},
{
"key" : "topic_02",
"doc_count" : 2,
"NAME" : {
"value" : 6.0
}
}
]
}
}
}
]
}
}
}
我现在无法访问 Java DSL,但查询应该如下所示:
TermsAggregationBuilder aggregation = AggregationBuilders
.terms("groupId")
.field("groupId.keyword")
.subAggregation(AggregationBuilders
.nested("agg", "topics")
.terms("topic_names")
.field("topics.name.keyword").subAggregation(AggregationBuilders
.avg("avg").field("topics.valore")));
我正在尝试对 elasticsearch 上的聚合数据进行平均。这是我的数据结构:
文档 1
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":2
},
{
"name":"topic_02",
"valore":4
}
]
}
文档 2
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":4
},
{
"name":"topic_02",
"valore":8
}
]
}
我必须通过 groupId 和主题名称创建一个聚合,并在此聚合上计算值字段的平均值。但是用源码试了一下求平均值的结果是错误的
根据文件一和文件二的上述数据,预期结果应该是:
groupId | topicName | average |
---|---|---|
TEST_01 | topic_01 | 3 |
TEST_01 | topic_02 | 6 |
TermsAggregationBuilder aggregation = AggregationBuilders
.terms("groupId")
.field("groupId.keyword")
.subAggregation(AggregationBuilders
.terms("topicName")
.field("topics.name.keyword").subAggregation(AggregationBuilders
.avg("avg").field("topics.valore")));
首先确保您的主题字段类型为“嵌套”,因为如果它是“对象”,则 topicName 和 valores 将被展平。这意味着您最终将得到一组价值和主题名称,它们之间没有关系。
映射
{
"test_ynsanity" : {
"mappings" : {
"properties" : {
"detectionDate" : {
"type" : "date"
},
"groupId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"lag" : {
"type" : "long"
},
"tipo" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"topics" : {
"type" : "nested",
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"valore" : {
"type" : "long"
}
}
}
}
}
}
}
正在摄取数据
POST test_ynsanity/_doc
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":2
},
{
"name":"topic_02",
"valore":4
}
]
}
POST test_ynsanity/_doc
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":4
},
{
"name":"topic_02",
"valore":8
}
]
}
查询
POST test_ynsanity/_search
{
"size": 0,
"aggs": {
"groups": {
"terms": {
"field": "groupId.keyword",
"size": 10
},
"aggs": {
"topics": {
"nested": {
"path": "topics"
},
"aggs": {
"topic_names": {
"terms": {
"field": "topics.name.keyword"
},
"aggs": {
"topic_avg": {
"avg": {
"field": "topics.valore"
}
}
}
}
}
}
}
}
}
}
回应
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"groups" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "TEST_01",
"doc_count" : 2,
"topics" : {
"doc_count" : 4,
"topic_names" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "topic_01",
"doc_count" : 2,
"NAME" : {
"value" : 3.0
}
},
{
"key" : "topic_02",
"doc_count" : 2,
"NAME" : {
"value" : 6.0
}
}
]
}
}
}
]
}
}
}
我现在无法访问 Java DSL,但查询应该如下所示:
TermsAggregationBuilder aggregation = AggregationBuilders
.terms("groupId")
.field("groupId.keyword")
.subAggregation(AggregationBuilders
.nested("agg", "topics")
.terms("topic_names")
.field("topics.name.keyword").subAggregation(AggregationBuilders
.avg("avg").field("topics.valore")));