ElasticSearch - merge/combine 组结果
ElasticSearch - merge/combine the results by group
ElasticSearch 中存储的数据如下:
[
{
"id": 1,
"class": "class 1",
"name": "Scott",
"scores": [
{ "year": 2022, "score": 100 },
{ "year": 2011, "score": 80 },
{ "year": 2003, "score": 70 }
]
},
{
"id": 2,
"class": "class 1",
"name": "Gabriel",
"scores": [
{ "year": 2015, "score": 90 },
{ "year": 2011, "score": 70 }
]
},
{
"id": 3,
"class": "class 2",
"name": "Scott",
"scores": [
{ "year": 2022, "score": 100 },
{ "year": 2021, "score": 100 },
{ "year": 2003, "score": 80 }
]
},
{
"id": 4,
"class": "class 2",
"name": "Pierce",
"scores": [
{ "year": 2022, "score": 70 }
]
}
]
在 ElasticSearch 中有没有办法将 merge/combine scores 按特定组放入一个数组中? (保留重复值)
例如:
- 按class分组,会显示class1和[=22=的分数]class 2,只保留 class 和 scores 字段,结果将是:
[
{
"class": "class 1",
"scores": [
{ "year": 2022, "score": 100 },
{ "year": 2015, "score": 90 },
{ "year": 2011, "score": 80 },
{ "year": 2011, "score": 70 },
{ "year": 2003, "score": 70 }
]
},
{
"class": "class 2",
"scores": [
{ "year": 2022, "score": 100 },
{ "year": 2022, "score": 70 },
{ "year": 2021, "score": 100 },
{ "year": 2003, "score": 80 }
]
}
]
- Group by name,会将Scott的所有分数放到一个数组中,只保留name 和 分数 字段:
[
{
"name": "Scott",
"scores": [
{ "year": 2022, "score": 100 },
{ "year": 2022, "score": 100 },
{ "year": 2021, "score": 100 },
{ "year": 2011, "score": 80 },
{ "year": 2003, "score": 80 },
{ "year": 2003, "score": 70 }
]
},
{
"name": "Gabriel",
"scores": [
{ "year": 2015, "score": 90 },
{ "year": 2011, "score": 70 }
]
},
{
"name": "Pierce",
"scores": [
{ "year": 2022, "score": 70 }
]
}
]
谢谢!
免责声明:系好安全带会很冗长^^
TLDR;
是的,有可能,使用术语 aggregation。
例如,按class分组:
您将找到一个类型为 term
的名为 byClass
的存储桶。
Elastic 将为 class.
字段中的每个值创建文档桶
-> class 1
和 class 2
但是您会注意到它会在此聚合中创建更多聚合。
-> nestedAGG
、byyear
和 bynotes
另外 2 位进一步按年份细分桶,然后分别注释。
GET /71128503/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"byClass": {
"terms": {
"field": "class",
"size": 10
},
"aggs": {
"nestedAGG": {
"nested": {
"path": "scores"
},
"aggs": {
"byyear": {
"terms": {
"field": "scores.year",
"size": 10
},
"aggs": {
"bynotes": {
"terms": {
"field": "scores.score",
"size": 10
}
}
}
}
}
}
}
}
}
}
{
...
"aggregations" : {
"byClass" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "class 1",
"doc_count" : 2,
"nestedAGG" : {
"doc_count" : 5,
"byyear" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 2011,
"doc_count" : 2,
"bynotes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 70,
"doc_count" : 1
},
{
"key" : 80,
"doc_count" : 1
}
]
}
},
{
"key" : 2003,
"doc_count" : 1,
"bynotes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 70,
"doc_count" : 1
}
]
}
},
{
"key" : 2015,
"doc_count" : 1,
"bynotes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 90,
"doc_count" : 1
}
]
}
},
{
"key" : 2022,
"doc_count" : 1,
"bynotes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 100,
"doc_count" : 1
}
]
}
}
]
}
}
},
{
"key" : "class 2",
"doc_count" : 2,
"nestedAGG" : {
"doc_count" : 4,
"byyear" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 2022,
"doc_count" : 2,
"bynotes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 70,
"doc_count" : 1
},
{
"key" : 100,
"doc_count" : 1
}
]
}
},
{
"key" : 2003,
"doc_count" : 1,
"bynotes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 80,
"doc_count" : 1
}
]
}
},
{
"key" : 2021,
"doc_count" : 1,
"bynotes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 100,
"doc_count" : 1
}
]
}
}
]
}
}
}
]
}
}
}
重现
获取数据。
注意自定义映射:
PUT /71128503/
{
"settings": {},
"mappings": {
"properties": {
"class": {
"type": "keyword"
},
"name":{
"type": "keyword"
},
"scores":{
"type": "nested",
"properties": {
"score": {
"type": "integer"
},
"year": {
"type": "integer"
}
}
}
}
}
}
POST /_bulk
{"index":{"_index":"71128503","_id":1}}
{"class":"class 1","name":"Scott","scores":[{"year":2022,"score":100},{"year":2011,"score":80},{"year":2003,"score":70}]}
{"index":{"_index":"71128503","_id":2}}
{"class":"class 1","name":"Gabriel","scores":[{"year":2015,"score":90},{"year":2011,"score":70}]}
{"index":{"_index":"71128503","_id":3}}
{"class":"class 2","name":"Scott","scores":[{"year":2022,"score":100},{"year":2021,"score":100},{"year":2003,"score":80}]}
{"index":{"_index":"71128503","_id":4}}
{"class":"class 2","name":"Pierce","scores":[{"year":2022,"score":70}]}
然后查询数据:
通过 class / 通过姓名
GET /71128503/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"byName": { <- Name of your bucket
"terms": { <- Type of grouping, Elastic support many, like sum, avg on numeric value ....
"field": "name", <- Field you grouping on
"size": 10
},
"aggs": {
"nestedAGG": {
"nested": {
"path": "scores"
},
"aggs": {
"byyear": {
"terms": {
"field": "scores.year",
"size": 10
},
"aggs": {
"bynotes": {
"terms": {
"field": "scores.score",
"size": 10
}
}
}
}
}
}
}
}
}
}
ElasticSearch 中存储的数据如下:
[
{
"id": 1,
"class": "class 1",
"name": "Scott",
"scores": [
{ "year": 2022, "score": 100 },
{ "year": 2011, "score": 80 },
{ "year": 2003, "score": 70 }
]
},
{
"id": 2,
"class": "class 1",
"name": "Gabriel",
"scores": [
{ "year": 2015, "score": 90 },
{ "year": 2011, "score": 70 }
]
},
{
"id": 3,
"class": "class 2",
"name": "Scott",
"scores": [
{ "year": 2022, "score": 100 },
{ "year": 2021, "score": 100 },
{ "year": 2003, "score": 80 }
]
},
{
"id": 4,
"class": "class 2",
"name": "Pierce",
"scores": [
{ "year": 2022, "score": 70 }
]
}
]
在 ElasticSearch 中有没有办法将 merge/combine scores 按特定组放入一个数组中? (保留重复值)
例如:
- 按class分组,会显示class1和[=22=的分数]class 2,只保留 class 和 scores 字段,结果将是:
[
{
"class": "class 1",
"scores": [
{ "year": 2022, "score": 100 },
{ "year": 2015, "score": 90 },
{ "year": 2011, "score": 80 },
{ "year": 2011, "score": 70 },
{ "year": 2003, "score": 70 }
]
},
{
"class": "class 2",
"scores": [
{ "year": 2022, "score": 100 },
{ "year": 2022, "score": 70 },
{ "year": 2021, "score": 100 },
{ "year": 2003, "score": 80 }
]
}
]
- Group by name,会将Scott的所有分数放到一个数组中,只保留name 和 分数 字段:
[
{
"name": "Scott",
"scores": [
{ "year": 2022, "score": 100 },
{ "year": 2022, "score": 100 },
{ "year": 2021, "score": 100 },
{ "year": 2011, "score": 80 },
{ "year": 2003, "score": 80 },
{ "year": 2003, "score": 70 }
]
},
{
"name": "Gabriel",
"scores": [
{ "year": 2015, "score": 90 },
{ "year": 2011, "score": 70 }
]
},
{
"name": "Pierce",
"scores": [
{ "year": 2022, "score": 70 }
]
}
]
谢谢!
免责声明:系好安全带会很冗长^^
TLDR;
是的,有可能,使用术语 aggregation。
例如,按class分组:
您将找到一个类型为 term
的名为 byClass
的存储桶。
Elastic 将为 class.
-> class 1
和 class 2
但是您会注意到它会在此聚合中创建更多聚合。
-> nestedAGG
、byyear
和 bynotes
另外 2 位进一步按年份细分桶,然后分别注释。
GET /71128503/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"byClass": {
"terms": {
"field": "class",
"size": 10
},
"aggs": {
"nestedAGG": {
"nested": {
"path": "scores"
},
"aggs": {
"byyear": {
"terms": {
"field": "scores.year",
"size": 10
},
"aggs": {
"bynotes": {
"terms": {
"field": "scores.score",
"size": 10
}
}
}
}
}
}
}
}
}
}
{
...
"aggregations" : {
"byClass" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "class 1",
"doc_count" : 2,
"nestedAGG" : {
"doc_count" : 5,
"byyear" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 2011,
"doc_count" : 2,
"bynotes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 70,
"doc_count" : 1
},
{
"key" : 80,
"doc_count" : 1
}
]
}
},
{
"key" : 2003,
"doc_count" : 1,
"bynotes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 70,
"doc_count" : 1
}
]
}
},
{
"key" : 2015,
"doc_count" : 1,
"bynotes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 90,
"doc_count" : 1
}
]
}
},
{
"key" : 2022,
"doc_count" : 1,
"bynotes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 100,
"doc_count" : 1
}
]
}
}
]
}
}
},
{
"key" : "class 2",
"doc_count" : 2,
"nestedAGG" : {
"doc_count" : 4,
"byyear" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 2022,
"doc_count" : 2,
"bynotes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 70,
"doc_count" : 1
},
{
"key" : 100,
"doc_count" : 1
}
]
}
},
{
"key" : 2003,
"doc_count" : 1,
"bynotes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 80,
"doc_count" : 1
}
]
}
},
{
"key" : 2021,
"doc_count" : 1,
"bynotes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 100,
"doc_count" : 1
}
]
}
}
]
}
}
}
]
}
}
}
重现
获取数据。 注意自定义映射:
PUT /71128503/
{
"settings": {},
"mappings": {
"properties": {
"class": {
"type": "keyword"
},
"name":{
"type": "keyword"
},
"scores":{
"type": "nested",
"properties": {
"score": {
"type": "integer"
},
"year": {
"type": "integer"
}
}
}
}
}
}
POST /_bulk
{"index":{"_index":"71128503","_id":1}}
{"class":"class 1","name":"Scott","scores":[{"year":2022,"score":100},{"year":2011,"score":80},{"year":2003,"score":70}]}
{"index":{"_index":"71128503","_id":2}}
{"class":"class 1","name":"Gabriel","scores":[{"year":2015,"score":90},{"year":2011,"score":70}]}
{"index":{"_index":"71128503","_id":3}}
{"class":"class 2","name":"Scott","scores":[{"year":2022,"score":100},{"year":2021,"score":100},{"year":2003,"score":80}]}
{"index":{"_index":"71128503","_id":4}}
{"class":"class 2","name":"Pierce","scores":[{"year":2022,"score":70}]}
然后查询数据:
通过 class / 通过姓名
GET /71128503/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"byName": { <- Name of your bucket
"terms": { <- Type of grouping, Elastic support many, like sum, avg on numeric value ....
"field": "name", <- Field you grouping on
"size": 10
},
"aggs": {
"nestedAGG": {
"nested": {
"path": "scores"
},
"aggs": {
"byyear": {
"terms": {
"field": "scores.year",
"size": 10
},
"aggs": {
"bynotes": {
"terms": {
"field": "scores.score",
"size": 10
}
}
}
}
}
}
}
}
}
}