具有嵌套聚合的 Elasticsearch NEST 客户端

Elasticsearch NEST client with nested aggregations

我正在使用 NEST 2.3.2。我正在尝试使用嵌套聚合构建查询。基本上,我有一个带有日志的索引,它有一个时间戳和一个结果代码。我想先把那些日志放到分钟桶里,再根据结果码进一步分类。

我有以下用于生成查询的 F# 代码。

/// Generate an aggregation to put buckets by result code
let generateAggregationByResultCode () =
    let resultAggregationName = "result_aggregation"
    let aggregationByResults = new TermsAggregation(resultAggregationName)
    aggregationByResults.Field <- new Field(Name = "Result")
    aggregationByResults.ExecutionHint <- new Nullable<TermsAggregationExecutionHint>(TermsAggregationExecutionHint.GlobalOrdinals);
    aggregationByResults.MinimumDocumentCount <- new Nullable<int>(0);
    aggregationByResults.Size <- new Nullable<int>(bucketSize);
    aggregationByResults.Missing <- "-128"
    aggregationByResults

/// Generate an aggregation to classify into buckets by minutes and then by result code
let generateNewDateHistogramByMinute () =
    let dateHistogramByMinute = new DateHistogramAggregation("by_minute")
    dateHistogramByMinute.Field <- new Field(Name = "OperationTime")
    dateHistogramByMinute.Interval <- new Union<DateInterval, Time>(DateInterval.Minute) // can also use TimeSpan.FromMinutes(1.0)
    dateHistogramByMinute.MinimumDocumentCount <- new Nullable<int>(0)
    dateHistogramByMinute.Format <- "strict_date_hour_minute"
    let innerAggregations = new AggregationDictionary()
    innerAggregations.[resultInnerAggregationName] <- new AggregationContainer(Terms = generateAggregationByResultCode ())
    dateHistogramByMinute.Aggregations <- innerAggregations
    dateHistogramByMinute

我用这个聚合来设置请求

let dateHistogram = generateNewDateHistogramByMinute ()
let aggregations = new AggregationDictionary()
aggregations.[histogramName] <- new AggregationContainer(DateHistogram = dateHistogram)
(* ... code omitted ... *)
dslRequest.Aggregations <- aggregations

当我打印出请求时,聚合部分是这样的

"aggs": {
    "BucketsByMinutes": {
      "date_histogram": {
        "field": "OperationTime",
        "interval": "minute",
        "format": "strict_date_hour_minute",
        "min_doc_count": 0
      }
    }
  }

内部聚合完全丢失。有谁知道我应该如何正确构建请求?返回响应时如何检索内部存储桶?我没有找到合适的属性或方法,而且文档基本上不存在。

我不确定您为什么没有在请求中看到内部聚合;我看到它与您所拥有的以下略微修改的版本

open Nest
open Elasticsearch.Net

type Document () =
    member val Name = "" with get, set

let pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200"))
let settings = new ConnectionSettings(pool, new InMemoryConnection())

settings.DisableDirectStreaming()
        .PrettyJson()
        .OnRequestCompleted(fun response ->
            if (response.RequestBodyInBytes <> null)
            then
                Console.WriteLine("{0} {1} \n{2}\n", response.HttpMethod, response.Uri, Encoding.UTF8.GetString(response.RequestBodyInBytes));
            else Console.WriteLine("{0} {1} \n", response.HttpMethod, response.Uri);

            if (response.ResponseBodyInBytes <> null)
            then
                Console.WriteLine("Status: {0}\n{1}\n{2}\n", response.HttpStatusCode, Encoding.UTF8.GetString(response.ResponseBodyInBytes), new String('-', 30));
            else Console.WriteLine("Status: {0}\n{1}\n", response.HttpStatusCode, new String('-', 30));
        ) |> ignore

let client = new ElasticClient(settings)

/// Generate an aggregation to put buckets by result code
let generateAggregationByResultCode () =
    let bucketSize = 10
    let resultAggregationName = "result_aggregation"
    let aggregationByResults = new TermsAggregation(resultAggregationName)
    aggregationByResults.Field <- Field.op_Implicit("Result")
    aggregationByResults.ExecutionHint <- new Nullable<TermsAggregationExecutionHint>(TermsAggregationExecutionHint.GlobalOrdinals);
    aggregationByResults.MinimumDocumentCount <- new Nullable<int>(0);
    aggregationByResults.Size <- new Nullable<int>(bucketSize);
    aggregationByResults.Missing <- "-128"
    aggregationByResults

/// Generate an aggregation to classify into buckets by minutes and then by result code
let generateNewDateHistogramByMinute () =
    let dateHistogramByMinute = new DateHistogramAggregation("by_minute")
    dateHistogramByMinute.Field <- Field.op_Implicit("OperationTime")
    dateHistogramByMinute.Interval <- new Union<DateInterval, Time>(DateInterval.Minute) // can also use TimeSpan.FromMinutes(1.0)
    dateHistogramByMinute.MinimumDocumentCount <- new Nullable<int>(0)
    dateHistogramByMinute.Format <- "strict_date_hour_minute"
    dateHistogramByMinute.Aggregations <- AggregationDictionary.op_Implicit(generateAggregationByResultCode())
    dateHistogramByMinute

let request = new SearchRequest<Document>()
request.Aggregations <- (AggregationDictionary.op_Implicit(generateNewDateHistogramByMinute()))

let response = client.Search<Document>(request)

这会在控制台中产生以下内容

POST http://localhost:9200/_search?pretty=true 
{
  "aggs": {
    "by_minute": {
      "date_histogram": {
        "field": "OperationTime",
        "interval": "minute",
        "format": "strict_date_hour_minute",
        "min_doc_count": 0
      },
      "aggs": {
        "result_aggregation": {
          "terms": {
            "field": "Result",
            "size": 10,
            "min_doc_count": 0,
            "execution_hint": "global_ordinals",
            "missing": "-128"
          }
        }
      }
    }
  }
}

Status: 200
------------------------------

以上内容可能对您的开发有用;当您准备好针对 Elasticsearch 执行时,从 ConnectionSettings 构造函数中删除 InMemoryConnection 并删除对 .DisableDirectStreaming().PrettyJson().OnRequestCompleted(fun) 的调用 ConnectionSettings.