Elasticsearch:如何使用嵌套过滤从嵌套文档聚合到每个主文档?

Elasticsearch: How to aggregate from nested documents up to each main document with nested filtering?

是否有一种语法可以将嵌套文档聚合到遵守嵌套过滤的每个主文档?例如,在下面的示例中,我将如何找回每支至少有 2 年经验的球队的最年轻球员?我正在使用 Elasticsearch 7.10。

PUT sample
{
  "mappings": {
    "dynamic": "strict",
    "properties": {
      "teamId": { "type": "keyword", "index": true, "doc_values": true },
      "members": {
        "type": "nested",
        "properties": {
          "memberId": { "type": "keyword", "index": true, "doc_values": true },
          "age": { "type": "integer", "index": true, "doc_values": true },
          "experience": { "type": "integer", "index": true, "doc_values": true }
        }
      }
    }
  }
}

PUT sample/_doc/1
{
      "teamId" : "A"
    , "members" :
      [
          { "memberId" : "A1" , "age" : "11" , "experience" : "1" }
        , { "memberId" : "A2" , "age" : "21" , "experience" : "2" }
        , { "memberId" : "A3" , "age" : "31" , "experience" : "3" }
      ]                                    
}                                          
                                           
PUT sample/_doc/2                          
{                                          
      "teamId" : "B"                       
    , "members" :                          
      [                                    
          { "memberId" : "B1" , "age" : "12" , "experience" : "1" }
        , { "memberId" : "B2" , "age" : "22" , "experience" : "2" }
        , { "memberId" : "B3" , "age" : "32" , "experience" : "3" }
        , { "memberId" : "B4" , "age" : "42" , "experience" : "4" }
      ]                                    
}                                          
                                           
                                           
PUT sample/_doc/3                          
{                                          
      "teamId" : "C"                       
    , "members" :                          
      [                                    
          { "memberId" : "C1" , "age" : "13" , "experience" : "1" }
        , { "memberId" : "C2" , "age" : "23" , "experience" : "2" }
        , { "memberId" : "C3" , "age" : "33" , "experience" : "3" }
      ]
}

我可以在不过滤的情况下使用它,如下所示:

POST sample/_search?filter_path=aggregations.teams.buckets.key,aggregations.teams.buckets.members.*.value,aggregations.teams.buckets.members.*.hits.hits._source.*
{
  "aggs": {
    "teams": {
      "terms": {
        "field": "teamId",
        "size": 10
      },
         
      "aggs": {
        "members": {
          "nested": {
            "path": "members"
          } ,
          "aggs": { 
              "min_age": { "min": { "field": "members.age" } }
            , "max_age": { "max": { "field": "members.age" } }
          }
        }
      }
    }
  }
}

但是当我如下所示添加过滤时,我可以让 inner_hits 正确过滤,但 aggs 不考虑它。有没有办法让 aggs 考虑嵌套过滤器?

POST sample/_search?filter_path=aggregations.teams.buckets.key,aggregations.teams.buckets.members.*.value,aggregations.teams.buckets.members.*.hits.hits._source.*,hits.hits.inner_hits.members.hits.hits._source.*
{
  "aggs": {
    "teams": {
      "terms": {
        "field": "teamId",
        "size": 10
      },
         
      "aggs": {
        "members": {
          "nested": {
            "path": "members"
          } ,
          "aggs": { 
              "min_age": { "min": { "field": "members.age" } }
            , "max_age": { "max": { "field": "members.age" } }
          }
        }
      }
    }
  }
  , "query": {
    "bool": {
      "filter": [
          {  "nested": {
              "path": "members",
              "query": {
                "bool": {
                  "must": [
                      { "range": { "members.experience": { "gte": 3 } } }
                  ]
                }
              }
              , "inner_hits" : { "size" : 100 , "sort" : [ { "members.memberId" : { "order" : "asc" } } ] }
            }
          }
      ]
    }
  }
}

上面的输出:

{
  "hits" : {
    "hits" : [
      {
        "inner_hits" : {
          "members" : {
            "hits" : {
              "hits" : [
                {
                  "_source" : {
                    "experience" : "3",
                    "age" : "33",
                    "memberId" : "C3"
                  }
                }
              ]
            }
          }
        }
      },
      {
        "inner_hits" : {
          "members" : {
            "hits" : {
              "hits" : [
                {
                  "_source" : {
                    "experience" : "3",
                    "age" : "32",
                    "memberId" : "B3"
                  }
                },
                {
                  "_source" : {
                    "experience" : "4",
                    "age" : "42",
                    "memberId" : "B4"
                  }
                }
              ]
            }
          }
        }
      },
      {
        "inner_hits" : {
          "members" : {
            "hits" : {
              "hits" : [
                {
                  "_source" : {
                    "experience" : "3",
                    "age" : "31",
                    "memberId" : "A3"
                  }
                }
              ]
            }
          }
        }
      }
    ]
  },
  "aggregations" : {
    "teams" : {
      "buckets" : [
        {
          "key" : "A",
          "members" : {
            "max_age" : {
              "value" : 31.0
            },
            "min_age" : {
              "value" : 11.0
            }
          }
        },
        {
          "key" : "B",
          "members" : {
            "max_age" : {
              "value" : 42.0
            },
            "min_age" : {
              "value" : 12.0
            }
          }
        },
        {
          "key" : "C",
          "members" : {
            "max_age" : {
              "value" : 33.0
            },
            "min_age" : {
              "value" : 13.0
            }
          }
        }
      ]
    }
  }
}

当您在查询中使用嵌套过滤器时,elasticsearch 会过滤主要文档并聚合它们,但它不会在聚合时过滤嵌套文档。您应该将相同的过滤器添加到聚合中。此查询应该适合您。

{
  "aggs": {
    "teams": {
      "terms": {
        "field": "teamId",
        "size": 10
      },
      "aggs": {
        "members": {
          "nested": {
            "path": "members"
          },
          "aggs": {
            "filtered": {
              "filter": {
                "range": {
                  "members.experience": {
                    "gte": 3
                  }
                }
              },
              "aggs": {
                "min_age": {
                  "min": {
                    "field": "members.age"
                  }
                },
                "max_age": {
                  "max": {
                    "field": "members.age"
                  }
                }
              }
            }
          }
        }
      }
    }
  },
  "query": {
    "bool": {
      "filter": [
        {
          "nested": {
            "path": "members",
            "query": {
              "bool": {
                "must": [
                  {
                    "range": {
                      "members.experience": {
                        "gte": 3
                      }
                    }
                  }
                ]
              }
            },
            "inner_hits": {
              "size": 100,
              "sort": [
                {
                  "members.memberId": {
                    "order": "asc"
                  }
                }
              ]
            }
          }
        }
      ]
    }
  }
}

YD9 上面的回答非常适合所问的问题,但我发现当我需要添加更多嵌套条件时,我不得不稍微修改一下。以下是允许您执行此操作的语法。它需要将 YD9 查询中的“过滤器”模式更改为“filter/bool/filter”模式。

GET sample/_search?filter_path=hits.total.value,aggregations.teams.buckets.key,aggregations.teams.buckets.members.*.*.value,hits.hits.inner_hits.members.hits.hits._source.*
{
  "aggs": {
    "teams": {
      "terms": {
        "field": "teamId",
        "size": 10
      },
      "aggs": {
        "members": {
          "nested": {
            "path": "members"
          },
          "aggs": {
            "filtered": {
              "filter": {
                "bool": {
                  "filter" : 
                  [
                    // These conditions must match what is in nested query filter below
                    { "match_all" : {} }
                    , { "range": { "members.experience": { "gte": 2 } } }
                    // More criteria can be added below
                  ]
                }
              },
              "aggs": {
                "min_age": { "min": { "field": "members.age" } }
              }
            }
          }
        }
      }
    }
  },
  "query": {
    "bool": {
      "filter": [
        {
          "nested": {
            "path": "members",
            "query": {
              "bool": {
                "must": [
                    { "match_all" : {} }
                    , { "range": { "members.experience": { "gte": 2 } } }
                    // More criteria can be added below
                ]
              }
            },
            "inner_hits": {
              "size": 100,
              "sort": [
                {
                  "members.memberId": {
                    "order": "asc"
                  }
                }
              ]
            }
          }
        }
      ]
    }
  }
}