Elasticsearch 过滤聚合桶不存在的文档

Elasticsearch filter documents where aggregation bucket DOES NOT EXIST

我有一个查询可以提供我想要的结果,但我需要进一步过滤以便仅显示 MISSING 特定存储桶的记录。

我的查询是这样的:

{
"size": 0,
"query": 
{
    "bool": 
    {
        "must": [{"match_all": {}}],
        "filter": 
        [
            {
                "bool": 
                {
                    "should": 
                    [
                        {"match_phrase": {"user": "bob_user"}},
                        {"match_phrase": {"user": "tom_user"}}
                    ],"minimum_should_match": 1
                }
            },
            {
                "bool": 
                {
                    "should": 
                    [
                        {"match_phrase": {"result_code": "403"}},
                        {"match_phrase": {"result_code": "200"}}
                    ],"minimum_should_match": 1
                }
            },
            {
                "range": {"time": {"gte": "2021-05-12T18:51:22.512Z","lte": "2021-05-13T18:51:22.512Z","format": "strict_date_optional_time"}}}
        ]
    }
},
"aggs": 
{
    "stats": 
    {
        "terms": {"field": "host.keyword","order": {"total_distinct_ip_count": "desc"},"size": 10000},
        "aggs": 
        {
            "total_distinct_ip_count": {"cardinality": {"field": "ip.keyword"}},
            "status_codes": 
            {
                "terms": {"field": "result_code.keyword","order": {"distinct_ip_count_by_status_code": "desc"},"size": 2},
                "aggs": 
                {
                    "distinct_ip_count_by_status_code": {"cardinality": {"field": "ip.keyword"}}
                }
            }
        }
    }
}

}

产生以下结果:

{
  "key" : "dom.com",
  "doc_count" : 92974,
  "status_codes" : {
    "buckets" : [
      {
        "key" : "200",
        "doc_count" : 92965,
        "distinct_ip_count_by_status_code" : {"value" : 51269}
      },
      {
        "key" : "403",
        "doc_count" : 9,
        "distinct_ip_count_by_status_code" : {"value" : 2}
      }
    ]
  },
  "total_distinct_ip_count" : {"value" : 51269}
},
{
  "key" : "dom2.com",
  "doc_count" : 1420,
  "status_codes" : {
    "buckets" : [
      {
        "key" : "403",
        "doc_count" : 1420,
        "distinct_ip_count_by_status_code" : {"value" : 5}
      }
    ]
  },
  "total_distinct_ip_count" : {"value" : 500}
},
{
  "key" : "dom3.com",
  "doc_count" : 171097,
  "status_codes" : {
    "buckets" : [
      {
        "key" : "200",
        "doc_count" : 127437,
        "distinct_ip_count_by_status_code" : {"value" : 735}
      },
      {
        "key" : "403",
        "doc_count" : 43660,
        "distinct_ip_count_by_status_code" : {"value" : 73}
      }
    ]
  },
  "total_distinct_ip_count" : {"value" : 808}
}

我需要一种方法来 return 只有缺少 200 桶的记录。在这种情况下,它将是 dom2.com 记录 ONLY,因为它有一个 403 桶,但没有 200 桶。我弄乱了 bucket_selector,但它只能从结果中排除一个桶。我想从整个结果中排除同时具有 200 和 403 条记录的记录。

{
"size": 0,
"query": 
{
    "bool": 
    {
        "must": [{"match_all": {}}],
        "filter": 
        [
            {
                "bool": 
                {
                    "should": 
                    [
                        {"match_phrase": {"user": "bob_user"}},
                        {"match_phrase": {"user": "tom_user"}}
                    ],"minimum_should_match": 1
                }
            },
            {
                "bool": 
                {
                    "should": 
                    [
                        {"match_phrase": {"result_code": "403"}},
                        {"match_phrase": {"result_code": "200"}}
                    ],"minimum_should_match": 1
                }
            },
            {
                "range": {"time": {"gte": "2021-05-12T18:51:22.512Z","lte": "2021-05-13T18:51:22.512Z","format": "strict_date_optional_time"}}}
        ]
    }
},
"aggs": 
{
    "stats": 
    {
        "terms": {"field": "host.keyword","order": {"total_distinct_ip_count": "desc"},"size": 10000},
        "aggs": 
        {
            "total_distinct_ip_count": {"cardinality": {"field": "ip.keyword"}},
            "status_codes": 
            {
                "terms": {"field": "result_code.keyword","order": {"distinct_ip_count_by_status_code": "desc"},"size": 2},
                "aggs": 
                {
                    "distinct_ip_count_by_status_code": {"cardinality": {"field": "ip.keyword"}}
                }
            },
            "only_403":
            {
                "bucket_selector":
                {
                    "buckets_path":
                    {"var1": "status_codes['200']>_count"},
                    "script": "params.var1 == null"
                }
            }
        }
    }
}