弹性搜索中过滤数据的统计
satistical on filtered data in elasticsearch
我尝试对过滤后的数据应用统计,但它不起作用...
我的场景是:
我想对一些过滤后的数据进行统计。所以我应用了以下查询:
{
"from": 0,
"size": 10000,
"facets": {
"stats": {
"statistical": {
"field": "xxxx"
},"facet_filter": {
"and": [
{
"term": {
"id_1": "33"
}
}
],
"or": [
{
"term": {
"type": "aaaa"
}
},
{
"term": {
"type": "bbbb"
}
}
]
}
}
}
}
它工作正常。但是当我申请多个字段时,它不是 working.The 以下是查询:
{
"from": 0,
"size": 10000,
"facets": {
"stats": {
"statistical": {
"field": "xxxx"
},"facet_filter": {
"and": [
{
"term": {
"id_1": "33"
}
}
],
"or": [
{
"term": {
"type": "aaaa"
}
},
{
"term": {
"type": "bbbb"
}
}
]
}
},
"stats1": {
"statistical": {
"field": "yyyy"
},"facet_filter": {
"and": [
{
"term": {
"id_1": "33"
}
}
],
"or": [
{
"term": {
"type": "aaaa"
}
},
{
"term": {
"type": "bbbb"
}
}
]
}
}
}
}
它抛出以下错误:
Parse Failure [No parser for element [stats1]]]; }]",
"status": 400
请分享您的想法。提前致谢。
您设置分面过滤器的方式使用了不正确的语法,它们也不符合逻辑,因为您在同一级别有一个 "and"
和一个 "or"
。这就是你错误的原因。我假设你的意思是这样的, "or"
在 "and"
:
"facet_filter": {
"and": [
{
"term": {
"id_1": "33"
}
},
{
"or": [
{
"term": {
"type": "aaaa"
}
},
{
"term": {
"type": "bbbb"
}
}
]
}
]
}
此外,由于您的过滤器在这两种情况下都相同,您也可以在过滤查询中使用它,这样您只需指定一次。使用它,我得到了以下玩具示例,正如我假设您想要的那样:
DELETE /test_index
PUT /test_index
{
"settings": {
"number_of_shards": 1
}
}
POST /test_index/_bulk
{"index":{"_index":"test_index","_type":"doc","_id":1}}
{"id_1":"32", "xxxx":2, "yyyy":5,"type":"aaaa"}
{"index":{"_index":"test_index","_type":"doc","_id":2}}
{"id_1":"32", "xxxx":3, "yyyy":10,"type":"bbbb"}
{"index":{"_index":"test_index","_type":"doc","_id":3}}
{"id_1":"33", "xxxx":4, "yyyy":15,"type":"aaaa"}
{"index":{"_index":"test_index","_type":"doc","_id":4}}
{"id_1":"33", "xxxx":5, "yyyy":20,"type":"bbbb"}
POST /test_index/_search
{
"size": 0,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"and": [
{
"term": {
"id_1": "33"
}
},
{
"or": [
{
"term": {
"type": "aaaa"
}
},
{
"term": {
"type": "bbbb"
}
}
]
}
]
}
}
},
"facets": {
"stats": {
"statistical": {
"field": "xxxx"
}
},
"stats1": {
"statistical": {
"field": "yyyy"
}
}
}
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"facets": {
"stats": {
"_type": "statistical",
"count": 2,
"total": 9,
"min": 4,
"max": 5,
"mean": 4.5,
"sum_of_squares": 41,
"variance": 0.25,
"std_deviation": 0.5
},
"stats1": {
"_type": "statistical",
"count": 2,
"total": 35,
"min": 15,
"max": 20,
"mean": 17.5,
"sum_of_squares": 625,
"variance": 6.25,
"std_deviation": 2.5
}
}
}
话虽如此,使用聚合可能会更好,尤其是现在已弃用分面。带有 stats aggregation 的相同示例如下所示:
POST /test_index/_search
{
"size": 0,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"and": [
{
"term": {
"id_1": "33"
}
},
{
"or": [
{
"term": {
"type": "aaaa"
}
},
{
"term": {
"type": "bbbb"
}
}
]
}
]
}
}
},
"aggs": {
"stats": {
"stats": {
"field": "xxxx"
}
},
"stats1": {
"stats": {
"field": "yyyy"
}
}
}
}
...
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"stats": {
"count": 2,
"min": 4,
"max": 5,
"avg": 4.5,
"sum": 9
},
"stats1": {
"count": 2,
"min": 15,
"max": 20,
"avg": 17.5,
"sum": 35
}
}
}
或者如果需要,您可以使用 extended stats aggregation:
POST /test_index/_search
{
"size": 0,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"and": [
{
"term": {
"id_1": "33"
}
},
{
"or": [
{
"term": {
"type": "aaaa"
}
},
{
"term": {
"type": "bbbb"
}
}
]
}
]
}
}
},
"aggs": {
"stats": {
"extended_stats": {
"field": "xxxx"
}
},
"stats1": {
"extended_stats": {
"field": "yyyy"
}
}
}
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"stats": {
"count": 2,
"min": 4,
"max": 5,
"avg": 4.5,
"sum": 9,
"sum_of_squares": 41,
"variance": 0.25,
"std_deviation": 0.5
},
"stats1": {
"count": 2,
"min": 15,
"max": 20,
"avg": 17.5,
"sum": 35,
"sum_of_squares": 625,
"variance": 6.25,
"std_deviation": 2.5
}
}
}
我尝试对过滤后的数据应用统计,但它不起作用...
我的场景是:
我想对一些过滤后的数据进行统计。所以我应用了以下查询:
{
"from": 0,
"size": 10000,
"facets": {
"stats": {
"statistical": {
"field": "xxxx"
},"facet_filter": {
"and": [
{
"term": {
"id_1": "33"
}
}
],
"or": [
{
"term": {
"type": "aaaa"
}
},
{
"term": {
"type": "bbbb"
}
}
]
}
}
}
}
它工作正常。但是当我申请多个字段时,它不是 working.The 以下是查询:
{
"from": 0,
"size": 10000,
"facets": {
"stats": {
"statistical": {
"field": "xxxx"
},"facet_filter": {
"and": [
{
"term": {
"id_1": "33"
}
}
],
"or": [
{
"term": {
"type": "aaaa"
}
},
{
"term": {
"type": "bbbb"
}
}
]
}
},
"stats1": {
"statistical": {
"field": "yyyy"
},"facet_filter": {
"and": [
{
"term": {
"id_1": "33"
}
}
],
"or": [
{
"term": {
"type": "aaaa"
}
},
{
"term": {
"type": "bbbb"
}
}
]
}
}
}
}
它抛出以下错误:
Parse Failure [No parser for element [stats1]]]; }]",
"status": 400
请分享您的想法。提前致谢。
您设置分面过滤器的方式使用了不正确的语法,它们也不符合逻辑,因为您在同一级别有一个 "and"
和一个 "or"
。这就是你错误的原因。我假设你的意思是这样的, "or"
在 "and"
:
"facet_filter": {
"and": [
{
"term": {
"id_1": "33"
}
},
{
"or": [
{
"term": {
"type": "aaaa"
}
},
{
"term": {
"type": "bbbb"
}
}
]
}
]
}
此外,由于您的过滤器在这两种情况下都相同,您也可以在过滤查询中使用它,这样您只需指定一次。使用它,我得到了以下玩具示例,正如我假设您想要的那样:
DELETE /test_index
PUT /test_index
{
"settings": {
"number_of_shards": 1
}
}
POST /test_index/_bulk
{"index":{"_index":"test_index","_type":"doc","_id":1}}
{"id_1":"32", "xxxx":2, "yyyy":5,"type":"aaaa"}
{"index":{"_index":"test_index","_type":"doc","_id":2}}
{"id_1":"32", "xxxx":3, "yyyy":10,"type":"bbbb"}
{"index":{"_index":"test_index","_type":"doc","_id":3}}
{"id_1":"33", "xxxx":4, "yyyy":15,"type":"aaaa"}
{"index":{"_index":"test_index","_type":"doc","_id":4}}
{"id_1":"33", "xxxx":5, "yyyy":20,"type":"bbbb"}
POST /test_index/_search
{
"size": 0,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"and": [
{
"term": {
"id_1": "33"
}
},
{
"or": [
{
"term": {
"type": "aaaa"
}
},
{
"term": {
"type": "bbbb"
}
}
]
}
]
}
}
},
"facets": {
"stats": {
"statistical": {
"field": "xxxx"
}
},
"stats1": {
"statistical": {
"field": "yyyy"
}
}
}
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"facets": {
"stats": {
"_type": "statistical",
"count": 2,
"total": 9,
"min": 4,
"max": 5,
"mean": 4.5,
"sum_of_squares": 41,
"variance": 0.25,
"std_deviation": 0.5
},
"stats1": {
"_type": "statistical",
"count": 2,
"total": 35,
"min": 15,
"max": 20,
"mean": 17.5,
"sum_of_squares": 625,
"variance": 6.25,
"std_deviation": 2.5
}
}
}
话虽如此,使用聚合可能会更好,尤其是现在已弃用分面。带有 stats aggregation 的相同示例如下所示:
POST /test_index/_search
{
"size": 0,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"and": [
{
"term": {
"id_1": "33"
}
},
{
"or": [
{
"term": {
"type": "aaaa"
}
},
{
"term": {
"type": "bbbb"
}
}
]
}
]
}
}
},
"aggs": {
"stats": {
"stats": {
"field": "xxxx"
}
},
"stats1": {
"stats": {
"field": "yyyy"
}
}
}
}
...
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"stats": {
"count": 2,
"min": 4,
"max": 5,
"avg": 4.5,
"sum": 9
},
"stats1": {
"count": 2,
"min": 15,
"max": 20,
"avg": 17.5,
"sum": 35
}
}
}
或者如果需要,您可以使用 extended stats aggregation:
POST /test_index/_search
{
"size": 0,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"and": [
{
"term": {
"id_1": "33"
}
},
{
"or": [
{
"term": {
"type": "aaaa"
}
},
{
"term": {
"type": "bbbb"
}
}
]
}
]
}
}
},
"aggs": {
"stats": {
"extended_stats": {
"field": "xxxx"
}
},
"stats1": {
"extended_stats": {
"field": "yyyy"
}
}
}
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"stats": {
"count": 2,
"min": 4,
"max": 5,
"avg": 4.5,
"sum": 9,
"sum_of_squares": 41,
"variance": 0.25,
"std_deviation": 0.5
},
"stats1": {
"count": 2,
"min": 15,
"max": 20,
"avg": 17.5,
"sum": 35,
"sum_of_squares": 625,
"variance": 6.25,
"std_deviation": 2.5
}
}
}