Elasticsearch 从复合聚合中排除键

Elasticsearch exclude key from composite aggregation

我需要在复合聚合中执行某些键的排除。 这里以我的索引中的一个文档为例:

{
    "end_date": 1230314400000,
    "parameter_codes": [28, 35, 30],
    "platform_code": "41012",
    "start_date": 1230314400000,
    "station_id": 7833246
}

我执行了一个搜索请求,允许我:获得每对 platform_code/parameter_codes 对的结果,加上获得 station_id 对应的结果以及对存储桶的分页。

请求如下:

{
    "size": 0,
    "query": {
        "match_all": {
            "boost": 1.0
        }
    },
    "_source": false,
    "aggregations": {
        "compositeAgg": {
            "composite": {
                "size": 10,
                "sources": [{
                        "platform_code": {
                            "terms": {
                                "field": "platform_code",
                                "missing_bucket": false,
                                "order": "asc"
                            }
                        }
                    }, {
                        "parameter_codes": {
                            "terms": {
                                "field": "parameter_codes",
                                "missing_bucket": false,
                                "order": "asc"
                            }
                        }
                    }]
            },
            "aggregations": {
                "aggstation_id": {
                    "terms": {
                        "field": "station_id",
                        "size": 2147483647,
                        "min_doc_count": 1,
                        "shard_min_doc_count": 0,
                        "show_term_doc_count_error": false,
                        "order": {
                            "_key": "asc"
                        }
                    }
                },
                "pipe": {
                    "bucket_sort": {
                        "sort": [{
                                "_key": {
                                    "order": "asc"
                                }
                            }],
                        "from": 0,
                        "size": 10,
                        "gap_policy": "SKIP"
                    }
                }
            }
        }
    }
}

这个请求给我以下结果:

{
    "took": 3,
    "timed_out": false,
    "_shards": {
        "total": 8,
        "successful": 8,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 3,
            "relation": "eq"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "composite#compositeAgg": {
            "after_key": {
                "platform_code": "41012",
                "parameter_codes": 60
            },
            "buckets": [{
                    "key": {
                        "platform_code": "41012",
                        "parameter_codes": 28
                    },
                    "doc_count": 1,
                    "lterms#aggstation_id": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [{
                                "key": 7833246,
                                "doc_count": 1
                            }]
                    }
                }, {
                    "key": {
                        "platform_code": "41012",
                        "parameter_codes": 30
                    },
                    "doc_count": 2,
                    "lterms#aggstation_id": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [{
                                "key": 7833246,
                                "doc_count": 1
                            }, {
                                "key": 12787501,
                                "doc_count": 1
                            }]
                    }
                }, {
                    "key": {
                        "platform_code": "41012",
                        "parameter_codes": 35
                    },
                    "doc_count": 2,
                    "lterms#aggstation_id": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [{
                                "key": 7833246,
                                "doc_count": 1
                            }, {
                                "key": 12787501,
                                "doc_count": 1
                            }]
                    }
                }]
        }
    }
}

这非常有效,但我需要排除一个或多个 parameter_code。 例如,通过排除“35”,我只需要键:

{
   "platform_code": "41012",
   "parameter_codes": 28
}

{
   "platform_code": "41012",
   "parameter_codes": 30
}

我尝试了很多选项,但无法成功执行此操作。 谁能知道我该怎么做?

您可以尝试从查询中排除“parameter_codes=35”这个选项。

{
      "query": {
        "bool": {
          "must_not": [
            {
              "term": {
                "parameter_codes": {
                  "value": "35"
                }
              }
            }
          ]
        }
      }
    }

可以在复合源中使用脚本查询 return 仅数组的特定值。

{
  "size": 0,
  "query": {
    "match_all": {
      "boost": 1
    }
  },
  "_source": false,
  "aggregations": {
    "compositeAgg": {
      "composite": {
        "size": 10,
        "sources": [
          {
            "platform_code": {
              "terms": {
                "field": "platform_code.keyword",
                "missing_bucket": false,
                "order": "asc"
              }
            }
          },
          {
            "parameter_codes": {
              "terms": {
                "script": {
                  "source": """
                   def arr=[];
                   for (item in doc['parameter_codes']) {
                       if(item !=35)
                       {
                          arr.add(item);
                       }
                    }
                  return arr"""
                }
              }
            }
          }
        ]
      },
      "aggregations": {
        "aggstation_id": {
          "terms": {
            "field": "station_id",
            "size": 2147483647,
            "min_doc_count": 1,
            "shard_min_doc_count": 0,
            "show_term_doc_count_error": false,
            "order": {
              "_key": "asc"
            }
          }
        },
        "pipe": {
          "bucket_sort": {
            "sort": [
              {
                "_key": {
                  "order": "asc"
                }
              }
            ],
            "from": 0,
            "size": 10,
            "gap_policy": "SKIP"
          }
        }
      }
    }
  }
}