获取所有时间 date_histogram 个存储桶结果
Fetch all time date_histogram buckets results
我有以下查询来使用 Elasticsearch 7.1 获取聚合。
{
"query": {
"bool": {
"filter": [
{
"bool": {
"must": [
{
"match": {
"viewedInFeed": true
}
}
]
}
}
]
}
},
"size": 0,
"aggs": {
"viewed_in_feed_by_day": {
"date_histogram": {
"field": "createdDate",
"interval" : "day",
"format" : "yyyy-MM-dd",
"min_doc_count": 1
}
}
}
}
结果大于 10,000,我不确定如何工作,因为 scroll
不可用于聚合。请参阅下面的回复。
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 10000,
"relation": "gte"
},
"max_score": null,
"hits": []
},
"aggregations": {
"viewed_in_feed_by_day": {
"buckets": [
{
"key_as_string": "2020-03-19",
"key": 1584576000000,
"doc_count": 3028
},
{
"key_as_string": "2020-03-20",
"key": 1584662400000,
"doc_count": 5384
},
{
"key_as_string": "2020-03-21",
"key": 1584748800000,
"doc_count": 3521
}
]
}
}
}
当使用 _count
时,文档数大于 10,000,即使没有 "min_doc_count": 1
也不会 return 结果,我知道无论如何还有更多数据。
基于 Jaspreet 的评论,我提出以下建议:
- 使用
track_total_hits=true
获取准确计数(自 7.0 起),同时保持 size=0
仅聚合。
- 使用
stats
聚合在 运行 直方图之前获得更多见解。
GET dates/_search
{
"track_total_hits": true,
"size": 0,
"aggs": {
"dates_insights": {
"stats": {
"field": "createdDate"
}
},
"viewed_in_feed_by_day": {
"date_histogram": {
"field": "createdDate",
"interval" : "month",
"format" : "yyyy-MM-dd",
"min_doc_count": 1
}
}
}
}
屈服
...
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"viewed_in_feed_by_day" : {
"buckets" : [
{
"key_as_string" : "2020-01-01",
"key" : 1577836800000,
"doc_count" : 1
},
{
"key_as_string" : "2020-02-01",
"key" : 1580515200000,
"doc_count" : 1
},
{
"key_as_string" : "2020-03-01",
"key" : 1583020800000,
"doc_count" : 1
}
]
},
"dates_insights" : {
"count" : 3,
...
"min_as_string" : "2020-01-22T13:09:21.588Z",
"max_as_string" : "2020-03-22T13:09:21.588Z",
...
}
}
...
我有以下查询来使用 Elasticsearch 7.1 获取聚合。
{
"query": {
"bool": {
"filter": [
{
"bool": {
"must": [
{
"match": {
"viewedInFeed": true
}
}
]
}
}
]
}
},
"size": 0,
"aggs": {
"viewed_in_feed_by_day": {
"date_histogram": {
"field": "createdDate",
"interval" : "day",
"format" : "yyyy-MM-dd",
"min_doc_count": 1
}
}
}
}
结果大于 10,000,我不确定如何工作,因为 scroll
不可用于聚合。请参阅下面的回复。
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 10000,
"relation": "gte"
},
"max_score": null,
"hits": []
},
"aggregations": {
"viewed_in_feed_by_day": {
"buckets": [
{
"key_as_string": "2020-03-19",
"key": 1584576000000,
"doc_count": 3028
},
{
"key_as_string": "2020-03-20",
"key": 1584662400000,
"doc_count": 5384
},
{
"key_as_string": "2020-03-21",
"key": 1584748800000,
"doc_count": 3521
}
]
}
}
}
当使用 _count
时,文档数大于 10,000,即使没有 "min_doc_count": 1
也不会 return 结果,我知道无论如何还有更多数据。
基于 Jaspreet 的评论,我提出以下建议:
- 使用
track_total_hits=true
获取准确计数(自 7.0 起),同时保持size=0
仅聚合。 - 使用
stats
聚合在 运行 直方图之前获得更多见解。
GET dates/_search
{
"track_total_hits": true,
"size": 0,
"aggs": {
"dates_insights": {
"stats": {
"field": "createdDate"
}
},
"viewed_in_feed_by_day": {
"date_histogram": {
"field": "createdDate",
"interval" : "month",
"format" : "yyyy-MM-dd",
"min_doc_count": 1
}
}
}
}
屈服
...
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"viewed_in_feed_by_day" : {
"buckets" : [
{
"key_as_string" : "2020-01-01",
"key" : 1577836800000,
"doc_count" : 1
},
{
"key_as_string" : "2020-02-01",
"key" : 1580515200000,
"doc_count" : 1
},
{
"key_as_string" : "2020-03-01",
"key" : 1583020800000,
"doc_count" : 1
}
]
},
"dates_insights" : {
"count" : 3,
...
"min_as_string" : "2020-01-22T13:09:21.588Z",
"max_as_string" : "2020-03-22T13:09:21.588Z",
...
}
}
...