在 doc_count 的基础上对 ElasticSearch 中的桶进行排序
Ordering the Buckets in ElasticSearch on the basis of doc_count
我是 ElasticSearch 的新手,我目前正在尝试编写一个涉及聚合的 ElasticSearch 查询,它将为我获取涉及某些字段的前 5 个存储桶(查询中涉及的字段数量是动态的,它可以介于 2字段和 5 个字段)。
我面临的问题是团队的当前 ElasticSearch 版本不支持多词项,因此我恢复了使用 top_hits 作为子聚合器的复合聚合,这样我就可以得到前 5 个桶。但是,我没有得到基于 doc_count / hits.
的排序结果。
要求:
{
"query":{
"bool":{
"must":[{
"exists":{"field":"uuid"}
},{
"query_string":{
"query":"*","lowercase_expanded_terms":false
}
}]
}
},
"aggs":{
"test_aggregation":{
"composite":{
"sources":[{
"zipCode":{"terms":{"field":"zipCode"}},
"routeCode":{"terms":{"field":"routeCode"}}
}]
}
},
"aggs":{
"test_aggregation_hits":{
"top_hits":{
"size":5,
"_source":{
"includes":["uuid"]
}
}
}
}
},
"size":"0"
}
回复:
{
"took": 310,
"timed_out": false,
"num_reduce_phases": 2,
"_shards": {
"total": 140,
"successful": 140,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 8400,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"test_aggregation": {
"after_key": {
"zipCode": "someRandomZipCode",
"routeCode": "someRandomRouteCode"
},
"buckets": [{
"key": {
"zipCode": "someRandomValue-1",
"routeCode": "someRandomRouteCode-1"
},
"doc_count": 36,
"test_aggregation_hits": {
"hits": {
"total": 36,
"max_score": 11.5650015,
"hits": [{
.... // some data
}]
}
}
}, {
"key": {
"zipCode": "someRandomValue-2",
"routeCode": "someRandomRouteCode-2"
},
"doc_count": 40,
"test_aggregation_hits": {
"hits": {
"total": 40,
"max_score": 11.5658015,
"hits": [{
.... // some data
}]
}
}
}, {
"key": {
"zipCode": "someRandomValue-3",
"routeCode": "someRandomRouteCode-3"
},
"doc_count": 13,
"test_aggregation_hits": {
"hits": {
"total": 13,
"max_score": 11.5750015,
"hits": [{
.... // some data
}]
}
}
}
....
....
]
}
}
}
- 我想知道我采用的方法是否正确?
- 如果方法不对,那我该如何解决呢?
- 如果方法是正确的,那么我做错了什么以及如何根据 doc_count 订购桶?
能够通过使用桶排序 ElasticSearch 聚合解决这个问题。
最终查询看起来像这样。
{
"query":{
"bool":{
"must":[{
"exists":{"field":"uuid"}
},{
"query_string":{
"query":"*","lowercase_expanded_terms":false
}
}]
}
},
"aggs":{
"test_aggregation":{
"composite":{
"sources":[{
"zipCode":{"terms":{"field":"zipCode"}},
"routeCode":{"terms":{"field":"routeCode"}}
}],
"size":"100"
}
},
"aggs":{
"test_aggregation_hits":{
"top_hits":{
"size":"25",
"_source":{
"includes":["uuid"]
}
}
},
"sort_buckets": {
"size":"10",
"sort": [
{"_count": { "order": "desc" } }
]
}
}
},
"size":"0"
}
我是 ElasticSearch 的新手,我目前正在尝试编写一个涉及聚合的 ElasticSearch 查询,它将为我获取涉及某些字段的前 5 个存储桶(查询中涉及的字段数量是动态的,它可以介于 2字段和 5 个字段)。
我面临的问题是团队的当前 ElasticSearch 版本不支持多词项,因此我恢复了使用 top_hits 作为子聚合器的复合聚合,这样我就可以得到前 5 个桶。但是,我没有得到基于 doc_count / hits.
的排序结果。要求:
{
"query":{
"bool":{
"must":[{
"exists":{"field":"uuid"}
},{
"query_string":{
"query":"*","lowercase_expanded_terms":false
}
}]
}
},
"aggs":{
"test_aggregation":{
"composite":{
"sources":[{
"zipCode":{"terms":{"field":"zipCode"}},
"routeCode":{"terms":{"field":"routeCode"}}
}]
}
},
"aggs":{
"test_aggregation_hits":{
"top_hits":{
"size":5,
"_source":{
"includes":["uuid"]
}
}
}
}
},
"size":"0"
}
回复:
{
"took": 310,
"timed_out": false,
"num_reduce_phases": 2,
"_shards": {
"total": 140,
"successful": 140,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 8400,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"test_aggregation": {
"after_key": {
"zipCode": "someRandomZipCode",
"routeCode": "someRandomRouteCode"
},
"buckets": [{
"key": {
"zipCode": "someRandomValue-1",
"routeCode": "someRandomRouteCode-1"
},
"doc_count": 36,
"test_aggregation_hits": {
"hits": {
"total": 36,
"max_score": 11.5650015,
"hits": [{
.... // some data
}]
}
}
}, {
"key": {
"zipCode": "someRandomValue-2",
"routeCode": "someRandomRouteCode-2"
},
"doc_count": 40,
"test_aggregation_hits": {
"hits": {
"total": 40,
"max_score": 11.5658015,
"hits": [{
.... // some data
}]
}
}
}, {
"key": {
"zipCode": "someRandomValue-3",
"routeCode": "someRandomRouteCode-3"
},
"doc_count": 13,
"test_aggregation_hits": {
"hits": {
"total": 13,
"max_score": 11.5750015,
"hits": [{
.... // some data
}]
}
}
}
....
....
]
}
}
}
- 我想知道我采用的方法是否正确?
- 如果方法不对,那我该如何解决呢?
- 如果方法是正确的,那么我做错了什么以及如何根据 doc_count 订购桶?
能够通过使用桶排序 ElasticSearch 聚合解决这个问题。
最终查询看起来像这样。
{
"query":{
"bool":{
"must":[{
"exists":{"field":"uuid"}
},{
"query_string":{
"query":"*","lowercase_expanded_terms":false
}
}]
}
},
"aggs":{
"test_aggregation":{
"composite":{
"sources":[{
"zipCode":{"terms":{"field":"zipCode"}},
"routeCode":{"terms":{"field":"routeCode"}}
}],
"size":"100"
}
},
"aggs":{
"test_aggregation_hits":{
"top_hits":{
"size":"25",
"_source":{
"includes":["uuid"]
}
}
},
"sort_buckets": {
"size":"10",
"sort": [
{"_count": { "order": "desc" } }
]
}
}
},
"size":"0"
}