使用 Elasticsearch 重新聚合嵌套聚合结果
Reaggregate on nested aggregation results using Elasticsearch
我想对具有条件的产品计算一些聚合(使用 Elasticsearch 6.2)。所有的条件都被扁平化了,我想重复使用一些聚合结果来按特定的条件重新聚合。
这是我的索引映射:
PUT my_index
{
"mappings" : {
"_doc" : {
"properties" : {
"contract": {
"properties": {
"products": {
"type": "nested",
"properties": {
"productKey": {
"type": "keyword"
},
"criteria": {
"type": "nested",
"properties": {
"criterionKey": {
"type": "keyword"
},
"criterionValue": {
"type": "keyword"
}
}
}
}
}
}
}
}
}
}
}
我用以下数据填充了我的索引:
POST my_index/_doc
{
"contract": {
"products": [
{
"productKey": "PK_0001",
"criteria": [
{
"criterionKey": "CK_AAAA",
"criterionValue": "above_50"
},
{
"criterionKey": "CK_AAAB",
"criterionValue": "all"
}
]
}
]
}
}
POST my_index/_doc
{
"contract": {
"products": [
{
"productKey": "PK_0001",
"criteria": [
{
"criterionKey": "CK_AAAA",
"criterionValue": "below_50"
},
{
"criterionKey": "CK_AAAB",
"criterionValue": "dep"
}
]
}
]
}
}
POST my_index/_doc
{
"contract": {
"products": [
{
"productKey": "PK_0002",
"criteria": [
{
"criterionKey": "CK_AAAA",
"criterionValue": "below_50"
},
{
"criterionKey": "CK_AAAB",
"criterionValue": "dep"
}
]
}
]
}
}
我能够计算每个产品所有标准值的出现次数。为此,我使用以下聚合请求:
POST my_index/_doc/_search
{
"size": 0,
"aggs": {
"agg_by_product": {
"nested": {
"path": "contract.products"
},
"aggs": {
"agg_by_product_key": {
"terms": {
"field": "contract.products.productKey"
},
"aggs": {
"agg_by_product_crit": {
"nested": {
"path": "contract.products.criteria"
},
"aggs": {
"agg_by_product_crit_key": {
"terms": {
"field": "contract.products.criteria.criterionKey",
"include": [ "CK_AAAB", "CK_AAAA" ]
},
"aggs": {
"agg_by_product_crit_value": {
"terms": {
"field": "contract.products.criteria.criterionValue"
}
}
}
}
}
}
}
}
}
}
}
}
它returns:
{
// ...
"aggregations": {
"agg_by_product": {
"doc_count": 3,
"agg_by_product_key": {
"buckets": [
{
"key": "PK_0001",
"doc_count": 2,
"agg_by_product_crit": {
"doc_count": 8,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 2,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1
},
{
"key": "all",
"doc_count": 1
}
]
}
},
{
"key": "CK_AAAA",
"doc_count": 2,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
},
{
"key": "above_50",
"doc_count": 1
}
]
}
}
]
}
}
},
{
"key": "PK_0002",
"doc_count": 1,
"agg_by_product_crit": {
"doc_count": 4,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 1,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1
}
]
}
},
{
"key": "CK_AAAA",
"doc_count": 1,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
}
]
}
}
]
}
}
}
]
}
}
}
}
现在我想按指定条件键的条件值进行聚合,以便得到如下内容:
{
// ...
"aggregations": {
"agg_by_product": {
"doc_count": 3,
"agg_by_product_key": {
"buckets": [
{
"key": "PK_0001",
"doc_count": 2,
"agg_by_product_crit": {
"doc_count": 8,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 2,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1,
"AGG_BY_SOMETHING": {
"buckets": [
{
"key": "CK_AAAA",
"doc_count": 1,
"AGG_BY_SOMETHING_2": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
}
]
}
}
]
}
},
{
"key": "all",
"doc_count": 1,
"AGG_BY_SOMETHING": {
"buckets": [
{
"key": "CK_AAAA",
"doc_count": 1,
"AGG_BY_SOMETHING_2": {
"buckets": [
{
"key": "above_50",
"doc_count": 1
}
]
}
}
]
}
}
]
}
}
]
}
}
},
{
"key": "PK_0002",
"doc_count": 1,
"agg_by_product_crit": {
"doc_count": 4,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 1,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1,
"AGG_BY_SOMETHING": {
"buckets": [
{
"key": "CK_AAAA",
"doc_count": 1,
"AGG_BY_SOMETHING_2": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
}
]
}
}
]
}
}
]
}
}
]
}
}
}
]
}
}
}
}
对应的聚合请求应该是什么?
最后我找到了一个使用 reverse_nested
聚合的解决方案。
POST my_index/_doc/_search
{
"size": 0,
"aggs": {
"agg_by_product": {
"nested": {
"path": "contract.products"
},
"aggs": {
"agg_by_product_key": {
"terms": {
"field": "contract.products.productKey"
},
"aggs": {
"agg_by_product_crit": {
"nested": {
"path": "contract.products.criteria"
},
"aggs": {
"agg_by_product_crit_key": {
"terms": {
"field": "contract.products.criteria.criterionKey",
"include": [ "CK_AAAB" ]
},
"aggs": {
"agg_by_product_crit_value": {
"terms": {
"field": "contract.products.criteria.criterionValue"
},
"aggs": {
"agg_back_to_root": {
"reverse_nested": {},
"aggs": {
"agg_by_product_crit2": {
"nested": {
"path": "contract.products.criteria"
},
"aggs": {
"agg_by_product_crit_key2": {
"terms": {
"field": "contract.products.criteria.criterionKey",
"include": [ "CK_AAAA" ]
},
"aggs": {
"agg_by_product_crit_value2": {
"terms": {
"field": "contract.products.criteria.criterionValue"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
我想对具有条件的产品计算一些聚合(使用 Elasticsearch 6.2)。所有的条件都被扁平化了,我想重复使用一些聚合结果来按特定的条件重新聚合。
这是我的索引映射:
PUT my_index
{
"mappings" : {
"_doc" : {
"properties" : {
"contract": {
"properties": {
"products": {
"type": "nested",
"properties": {
"productKey": {
"type": "keyword"
},
"criteria": {
"type": "nested",
"properties": {
"criterionKey": {
"type": "keyword"
},
"criterionValue": {
"type": "keyword"
}
}
}
}
}
}
}
}
}
}
}
我用以下数据填充了我的索引:
POST my_index/_doc
{
"contract": {
"products": [
{
"productKey": "PK_0001",
"criteria": [
{
"criterionKey": "CK_AAAA",
"criterionValue": "above_50"
},
{
"criterionKey": "CK_AAAB",
"criterionValue": "all"
}
]
}
]
}
}
POST my_index/_doc
{
"contract": {
"products": [
{
"productKey": "PK_0001",
"criteria": [
{
"criterionKey": "CK_AAAA",
"criterionValue": "below_50"
},
{
"criterionKey": "CK_AAAB",
"criterionValue": "dep"
}
]
}
]
}
}
POST my_index/_doc
{
"contract": {
"products": [
{
"productKey": "PK_0002",
"criteria": [
{
"criterionKey": "CK_AAAA",
"criterionValue": "below_50"
},
{
"criterionKey": "CK_AAAB",
"criterionValue": "dep"
}
]
}
]
}
}
我能够计算每个产品所有标准值的出现次数。为此,我使用以下聚合请求:
POST my_index/_doc/_search
{
"size": 0,
"aggs": {
"agg_by_product": {
"nested": {
"path": "contract.products"
},
"aggs": {
"agg_by_product_key": {
"terms": {
"field": "contract.products.productKey"
},
"aggs": {
"agg_by_product_crit": {
"nested": {
"path": "contract.products.criteria"
},
"aggs": {
"agg_by_product_crit_key": {
"terms": {
"field": "contract.products.criteria.criterionKey",
"include": [ "CK_AAAB", "CK_AAAA" ]
},
"aggs": {
"agg_by_product_crit_value": {
"terms": {
"field": "contract.products.criteria.criterionValue"
}
}
}
}
}
}
}
}
}
}
}
}
它returns:
{
// ...
"aggregations": {
"agg_by_product": {
"doc_count": 3,
"agg_by_product_key": {
"buckets": [
{
"key": "PK_0001",
"doc_count": 2,
"agg_by_product_crit": {
"doc_count": 8,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 2,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1
},
{
"key": "all",
"doc_count": 1
}
]
}
},
{
"key": "CK_AAAA",
"doc_count": 2,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
},
{
"key": "above_50",
"doc_count": 1
}
]
}
}
]
}
}
},
{
"key": "PK_0002",
"doc_count": 1,
"agg_by_product_crit": {
"doc_count": 4,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 1,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1
}
]
}
},
{
"key": "CK_AAAA",
"doc_count": 1,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
}
]
}
}
]
}
}
}
]
}
}
}
}
现在我想按指定条件键的条件值进行聚合,以便得到如下内容:
{
// ...
"aggregations": {
"agg_by_product": {
"doc_count": 3,
"agg_by_product_key": {
"buckets": [
{
"key": "PK_0001",
"doc_count": 2,
"agg_by_product_crit": {
"doc_count": 8,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 2,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1,
"AGG_BY_SOMETHING": {
"buckets": [
{
"key": "CK_AAAA",
"doc_count": 1,
"AGG_BY_SOMETHING_2": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
}
]
}
}
]
}
},
{
"key": "all",
"doc_count": 1,
"AGG_BY_SOMETHING": {
"buckets": [
{
"key": "CK_AAAA",
"doc_count": 1,
"AGG_BY_SOMETHING_2": {
"buckets": [
{
"key": "above_50",
"doc_count": 1
}
]
}
}
]
}
}
]
}
}
]
}
}
},
{
"key": "PK_0002",
"doc_count": 1,
"agg_by_product_crit": {
"doc_count": 4,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 1,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1,
"AGG_BY_SOMETHING": {
"buckets": [
{
"key": "CK_AAAA",
"doc_count": 1,
"AGG_BY_SOMETHING_2": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
}
]
}
}
]
}
}
]
}
}
]
}
}
}
]
}
}
}
}
对应的聚合请求应该是什么?
最后我找到了一个使用 reverse_nested
聚合的解决方案。
POST my_index/_doc/_search
{
"size": 0,
"aggs": {
"agg_by_product": {
"nested": {
"path": "contract.products"
},
"aggs": {
"agg_by_product_key": {
"terms": {
"field": "contract.products.productKey"
},
"aggs": {
"agg_by_product_crit": {
"nested": {
"path": "contract.products.criteria"
},
"aggs": {
"agg_by_product_crit_key": {
"terms": {
"field": "contract.products.criteria.criterionKey",
"include": [ "CK_AAAB" ]
},
"aggs": {
"agg_by_product_crit_value": {
"terms": {
"field": "contract.products.criteria.criterionValue"
},
"aggs": {
"agg_back_to_root": {
"reverse_nested": {},
"aggs": {
"agg_by_product_crit2": {
"nested": {
"path": "contract.products.criteria"
},
"aggs": {
"agg_by_product_crit_key2": {
"terms": {
"field": "contract.products.criteria.criterionKey",
"include": [ "CK_AAAA" ]
},
"aggs": {
"agg_by_product_crit_value2": {
"terms": {
"field": "contract.products.criteria.criterionValue"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}