使用 Elasticsearch RestHighLevelClient 的聚合日期
Aggregation date using Elasticsearch RestHighLevelClient
我正在使用 Elasticsearch RestHighLevelClient 并尝试在下面实施 sql:
select format(date,'yyyy-MM-dd'), count(*) from order group by
format(date,'yyyy-MM-dd')
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
sourceBuilder.query(QueryBuilders.matchAllQuery());
sourceBuilder.aggregation(AggregationBuilders.terms("date_count").field("time").format("yyyy-MM-dd"));
SearchRequest searchRequest = new SearchRequest("order_*");
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Aggregations aggregations = searchResponse.getAggregations();
我得到了结果,但有很多重复日期结果,例如:
buckets:
bucket1:
key: 1583390009000
keyAsString: 2020-03-05
docCount: 4
bucket2:
key: 1583391748000
keyAsString: 2020-03-05
docCount: 5
如何避免重复约会?我的意思是相同的日期(例如2020-03-05)算作一个桶。
这是一些示例文档
{
"_index": "order_202003",
"_type": "_doc",
"_id": "1568274527051",
"_score": 1,
"_source": {
"auto": 0,
"collection_currency": "RMB",
"collection_value": 0,
"customerid": "",
"freight": 0,
"gs": 201101,
"status": "accept",
"time": "2020-03-05T10:01:23.000+0800"
}
},
{
"_index": "order_202003",
"_type": "_doc",
"_id": "1568274527078",
"_score": 1,
"_source": {
"auto": 0,
"collection_currency": "RMB",
"collection_value": 0,
"customerid": "",
"freight": 0,
"gs": 413572,
"status": "accept",
"time": "2020-03-05T17:29:53.000+0800"
}
}
您应该使用 date histogram aggregation:
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
sourceBuilder.query(QueryBuilders.matchAllQuery());
//sourceBuilder.aggregation(AggregationBuilders.terms("date_count").field("time").format("yyyy-MM-dd"));
sourceBuilder.aggregation(AggregationBuilders.dateHistogram("date_count").calendarInterval(DateHistogramInterval.DAY).field("time"));
SearchRequest searchRequest = new SearchRequest("order_*");
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Aggregations aggregations = searchResponse.getAggregations();
我正在使用 Elasticsearch RestHighLevelClient 并尝试在下面实施 sql:
select format(date,'yyyy-MM-dd'), count(*) from order group by format(date,'yyyy-MM-dd')
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
sourceBuilder.query(QueryBuilders.matchAllQuery());
sourceBuilder.aggregation(AggregationBuilders.terms("date_count").field("time").format("yyyy-MM-dd"));
SearchRequest searchRequest = new SearchRequest("order_*");
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Aggregations aggregations = searchResponse.getAggregations();
我得到了结果,但有很多重复日期结果,例如:
buckets:
bucket1:
key: 1583390009000
keyAsString: 2020-03-05
docCount: 4
bucket2:
key: 1583391748000
keyAsString: 2020-03-05
docCount: 5
如何避免重复约会?我的意思是相同的日期(例如2020-03-05)算作一个桶。
这是一些示例文档
{
"_index": "order_202003",
"_type": "_doc",
"_id": "1568274527051",
"_score": 1,
"_source": {
"auto": 0,
"collection_currency": "RMB",
"collection_value": 0,
"customerid": "",
"freight": 0,
"gs": 201101,
"status": "accept",
"time": "2020-03-05T10:01:23.000+0800"
}
},
{
"_index": "order_202003",
"_type": "_doc",
"_id": "1568274527078",
"_score": 1,
"_source": {
"auto": 0,
"collection_currency": "RMB",
"collection_value": 0,
"customerid": "",
"freight": 0,
"gs": 413572,
"status": "accept",
"time": "2020-03-05T17:29:53.000+0800"
}
}
您应该使用 date histogram aggregation:
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
sourceBuilder.query(QueryBuilders.matchAllQuery());
//sourceBuilder.aggregation(AggregationBuilders.terms("date_count").field("time").format("yyyy-MM-dd"));
sourceBuilder.aggregation(AggregationBuilders.dateHistogram("date_count").calendarInterval(DateHistogramInterval.DAY).field("time"));
SearchRequest searchRequest = new SearchRequest("order_*");
searchRequest.source(sourceBuilder);
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
Aggregations aggregations = searchResponse.getAggregations();