获取所有时间 date_histogram 个存储桶结果

Fetch all time date_histogram buckets results

我有以下查询来使用 Elasticsearch 7.1 获取聚合。

{ 
  "query": { 
    "bool": { 
      "filter": [ 
        { 
          "bool": { 
            "must": [ 
              { 
                "match": { 
                  "viewedInFeed": true
                } 
              }
            ] 
          } 
        } 
      ] 
    } 
  },
  "size": 0, 
  "aggs": { 
    "viewed_in_feed_by_day": { 
      "date_histogram": { 
        "field": "createdDate", 
        "interval" : "day",
        "format" : "yyyy-MM-dd",
        "min_doc_count": 1
      } 
    } 
  } 
}

结果大于 10,000,我不确定如何工作,因为 scroll 不可用于聚合。请参阅下面的回复。

{
    "took": 3,
    "timed_out": false,
    "_shards": {
        "total": 5,
        "successful": 5,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": {
            "value": 10000,
            "relation": "gte"
        },
        "max_score": null,
        "hits": []
    },
    "aggregations": {
        "viewed_in_feed_by_day": {
            "buckets": [
                {
                    "key_as_string": "2020-03-19",
                    "key": 1584576000000,
                    "doc_count": 3028
                },
                {
                    "key_as_string": "2020-03-20",
                    "key": 1584662400000,
                    "doc_count": 5384
                },
                {
                    "key_as_string": "2020-03-21",
                    "key": 1584748800000,
                    "doc_count": 3521
                }
            ]
        }
    }
}

当使用 _count 时,文档数大于 10,000,即使没有 "min_doc_count": 1 也不会 return 结果,我知道无论如何还有更多数据。

基于 Jaspreet 的评论,我提出以下建议:

  • 使用 track_total_hits=true 获取准确计数(自 7.0 起),同时保持 size=0 仅聚合。
  • 使用 stats 聚合在 运行 直方图之前获得更多见解。
GET dates/_search
{ 
  "track_total_hits": true,               
  "size": 0, 
  "aggs": { 
    "dates_insights": {
      "stats": {
        "field": "createdDate"
      }
    },
    "viewed_in_feed_by_day": { 
      "date_histogram": { 
        "field": "createdDate", 
        "interval" : "month",
        "format" : "yyyy-MM-dd",
        "min_doc_count": 1
      } 
    } 
  } 
}

屈服

...
"hits" : {
    "total" : {
      "value" : 3,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "viewed_in_feed_by_day" : {
      "buckets" : [
        {
          "key_as_string" : "2020-01-01",
          "key" : 1577836800000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2020-02-01",
          "key" : 1580515200000,
          "doc_count" : 1
        },
        {
          "key_as_string" : "2020-03-01",
          "key" : 1583020800000,
          "doc_count" : 1
        }
      ]
    },
    "dates_insights" : {
      "count" : 3,
      ...
      "min_as_string" : "2020-01-22T13:09:21.588Z",
      "max_as_string" : "2020-03-22T13:09:21.588Z",
      ...
    }
  }
...