Elasticsearch 过滤聚合桶不存在的文档
Elasticsearch filter documents where aggregation bucket DOES NOT EXIST
我有一个查询可以提供我想要的结果,但我需要进一步过滤以便仅显示 MISSING 特定存储桶的记录。
我的查询是这样的:
{
"size": 0,
"query":
{
"bool":
{
"must": [{"match_all": {}}],
"filter":
[
{
"bool":
{
"should":
[
{"match_phrase": {"user": "bob_user"}},
{"match_phrase": {"user": "tom_user"}}
],"minimum_should_match": 1
}
},
{
"bool":
{
"should":
[
{"match_phrase": {"result_code": "403"}},
{"match_phrase": {"result_code": "200"}}
],"minimum_should_match": 1
}
},
{
"range": {"time": {"gte": "2021-05-12T18:51:22.512Z","lte": "2021-05-13T18:51:22.512Z","format": "strict_date_optional_time"}}}
]
}
},
"aggs":
{
"stats":
{
"terms": {"field": "host.keyword","order": {"total_distinct_ip_count": "desc"},"size": 10000},
"aggs":
{
"total_distinct_ip_count": {"cardinality": {"field": "ip.keyword"}},
"status_codes":
{
"terms": {"field": "result_code.keyword","order": {"distinct_ip_count_by_status_code": "desc"},"size": 2},
"aggs":
{
"distinct_ip_count_by_status_code": {"cardinality": {"field": "ip.keyword"}}
}
}
}
}
}
}
产生以下结果:
{
"key" : "dom.com",
"doc_count" : 92974,
"status_codes" : {
"buckets" : [
{
"key" : "200",
"doc_count" : 92965,
"distinct_ip_count_by_status_code" : {"value" : 51269}
},
{
"key" : "403",
"doc_count" : 9,
"distinct_ip_count_by_status_code" : {"value" : 2}
}
]
},
"total_distinct_ip_count" : {"value" : 51269}
},
{
"key" : "dom2.com",
"doc_count" : 1420,
"status_codes" : {
"buckets" : [
{
"key" : "403",
"doc_count" : 1420,
"distinct_ip_count_by_status_code" : {"value" : 5}
}
]
},
"total_distinct_ip_count" : {"value" : 500}
},
{
"key" : "dom3.com",
"doc_count" : 171097,
"status_codes" : {
"buckets" : [
{
"key" : "200",
"doc_count" : 127437,
"distinct_ip_count_by_status_code" : {"value" : 735}
},
{
"key" : "403",
"doc_count" : 43660,
"distinct_ip_count_by_status_code" : {"value" : 73}
}
]
},
"total_distinct_ip_count" : {"value" : 808}
}
我需要一种方法来 return 只有缺少 200 桶的记录。在这种情况下,它将是 dom2.com 记录 ONLY,因为它有一个 403 桶,但没有 200 桶。我弄乱了 bucket_selector,但它只能从结果中排除一个桶。我想从整个结果中排除同时具有 200 和 403 条记录的记录。
{
"size": 0,
"query":
{
"bool":
{
"must": [{"match_all": {}}],
"filter":
[
{
"bool":
{
"should":
[
{"match_phrase": {"user": "bob_user"}},
{"match_phrase": {"user": "tom_user"}}
],"minimum_should_match": 1
}
},
{
"bool":
{
"should":
[
{"match_phrase": {"result_code": "403"}},
{"match_phrase": {"result_code": "200"}}
],"minimum_should_match": 1
}
},
{
"range": {"time": {"gte": "2021-05-12T18:51:22.512Z","lte": "2021-05-13T18:51:22.512Z","format": "strict_date_optional_time"}}}
]
}
},
"aggs":
{
"stats":
{
"terms": {"field": "host.keyword","order": {"total_distinct_ip_count": "desc"},"size": 10000},
"aggs":
{
"total_distinct_ip_count": {"cardinality": {"field": "ip.keyword"}},
"status_codes":
{
"terms": {"field": "result_code.keyword","order": {"distinct_ip_count_by_status_code": "desc"},"size": 2},
"aggs":
{
"distinct_ip_count_by_status_code": {"cardinality": {"field": "ip.keyword"}}
}
},
"only_403":
{
"bucket_selector":
{
"buckets_path":
{"var1": "status_codes['200']>_count"},
"script": "params.var1 == null"
}
}
}
}
}
我有一个查询可以提供我想要的结果,但我需要进一步过滤以便仅显示 MISSING 特定存储桶的记录。
我的查询是这样的:
{
"size": 0,
"query":
{
"bool":
{
"must": [{"match_all": {}}],
"filter":
[
{
"bool":
{
"should":
[
{"match_phrase": {"user": "bob_user"}},
{"match_phrase": {"user": "tom_user"}}
],"minimum_should_match": 1
}
},
{
"bool":
{
"should":
[
{"match_phrase": {"result_code": "403"}},
{"match_phrase": {"result_code": "200"}}
],"minimum_should_match": 1
}
},
{
"range": {"time": {"gte": "2021-05-12T18:51:22.512Z","lte": "2021-05-13T18:51:22.512Z","format": "strict_date_optional_time"}}}
]
}
},
"aggs":
{
"stats":
{
"terms": {"field": "host.keyword","order": {"total_distinct_ip_count": "desc"},"size": 10000},
"aggs":
{
"total_distinct_ip_count": {"cardinality": {"field": "ip.keyword"}},
"status_codes":
{
"terms": {"field": "result_code.keyword","order": {"distinct_ip_count_by_status_code": "desc"},"size": 2},
"aggs":
{
"distinct_ip_count_by_status_code": {"cardinality": {"field": "ip.keyword"}}
}
}
}
}
}
}
产生以下结果:
{
"key" : "dom.com",
"doc_count" : 92974,
"status_codes" : {
"buckets" : [
{
"key" : "200",
"doc_count" : 92965,
"distinct_ip_count_by_status_code" : {"value" : 51269}
},
{
"key" : "403",
"doc_count" : 9,
"distinct_ip_count_by_status_code" : {"value" : 2}
}
]
},
"total_distinct_ip_count" : {"value" : 51269}
},
{
"key" : "dom2.com",
"doc_count" : 1420,
"status_codes" : {
"buckets" : [
{
"key" : "403",
"doc_count" : 1420,
"distinct_ip_count_by_status_code" : {"value" : 5}
}
]
},
"total_distinct_ip_count" : {"value" : 500}
},
{
"key" : "dom3.com",
"doc_count" : 171097,
"status_codes" : {
"buckets" : [
{
"key" : "200",
"doc_count" : 127437,
"distinct_ip_count_by_status_code" : {"value" : 735}
},
{
"key" : "403",
"doc_count" : 43660,
"distinct_ip_count_by_status_code" : {"value" : 73}
}
]
},
"total_distinct_ip_count" : {"value" : 808}
}
我需要一种方法来 return 只有缺少 200 桶的记录。在这种情况下,它将是 dom2.com 记录 ONLY,因为它有一个 403 桶,但没有 200 桶。我弄乱了 bucket_selector,但它只能从结果中排除一个桶。我想从整个结果中排除同时具有 200 和 403 条记录的记录。
{
"size": 0,
"query":
{
"bool":
{
"must": [{"match_all": {}}],
"filter":
[
{
"bool":
{
"should":
[
{"match_phrase": {"user": "bob_user"}},
{"match_phrase": {"user": "tom_user"}}
],"minimum_should_match": 1
}
},
{
"bool":
{
"should":
[
{"match_phrase": {"result_code": "403"}},
{"match_phrase": {"result_code": "200"}}
],"minimum_should_match": 1
}
},
{
"range": {"time": {"gte": "2021-05-12T18:51:22.512Z","lte": "2021-05-13T18:51:22.512Z","format": "strict_date_optional_time"}}}
]
}
},
"aggs":
{
"stats":
{
"terms": {"field": "host.keyword","order": {"total_distinct_ip_count": "desc"},"size": 10000},
"aggs":
{
"total_distinct_ip_count": {"cardinality": {"field": "ip.keyword"}},
"status_codes":
{
"terms": {"field": "result_code.keyword","order": {"distinct_ip_count_by_status_code": "desc"},"size": 2},
"aggs":
{
"distinct_ip_count_by_status_code": {"cardinality": {"field": "ip.keyword"}}
}
},
"only_403":
{
"bucket_selector":
{
"buckets_path":
{"var1": "status_codes['200']>_count"},
"script": "params.var1 == null"
}
}
}
}
}