Elasticsearch:如何使用嵌套过滤从嵌套文档聚合到每个主文档?
Elasticsearch: How to aggregate from nested documents up to each main document with nested filtering?
是否有一种语法可以将嵌套文档聚合到遵守嵌套过滤的每个主文档?例如,在下面的示例中,我将如何找回每支至少有 2 年经验的球队的最年轻球员?我正在使用 Elasticsearch 7.10。
PUT sample
{
"mappings": {
"dynamic": "strict",
"properties": {
"teamId": { "type": "keyword", "index": true, "doc_values": true },
"members": {
"type": "nested",
"properties": {
"memberId": { "type": "keyword", "index": true, "doc_values": true },
"age": { "type": "integer", "index": true, "doc_values": true },
"experience": { "type": "integer", "index": true, "doc_values": true }
}
}
}
}
}
PUT sample/_doc/1
{
"teamId" : "A"
, "members" :
[
{ "memberId" : "A1" , "age" : "11" , "experience" : "1" }
, { "memberId" : "A2" , "age" : "21" , "experience" : "2" }
, { "memberId" : "A3" , "age" : "31" , "experience" : "3" }
]
}
PUT sample/_doc/2
{
"teamId" : "B"
, "members" :
[
{ "memberId" : "B1" , "age" : "12" , "experience" : "1" }
, { "memberId" : "B2" , "age" : "22" , "experience" : "2" }
, { "memberId" : "B3" , "age" : "32" , "experience" : "3" }
, { "memberId" : "B4" , "age" : "42" , "experience" : "4" }
]
}
PUT sample/_doc/3
{
"teamId" : "C"
, "members" :
[
{ "memberId" : "C1" , "age" : "13" , "experience" : "1" }
, { "memberId" : "C2" , "age" : "23" , "experience" : "2" }
, { "memberId" : "C3" , "age" : "33" , "experience" : "3" }
]
}
我可以在不过滤的情况下使用它,如下所示:
POST sample/_search?filter_path=aggregations.teams.buckets.key,aggregations.teams.buckets.members.*.value,aggregations.teams.buckets.members.*.hits.hits._source.*
{
"aggs": {
"teams": {
"terms": {
"field": "teamId",
"size": 10
},
"aggs": {
"members": {
"nested": {
"path": "members"
} ,
"aggs": {
"min_age": { "min": { "field": "members.age" } }
, "max_age": { "max": { "field": "members.age" } }
}
}
}
}
}
}
但是当我如下所示添加过滤时,我可以让 inner_hits 正确过滤,但 aggs 不考虑它。有没有办法让 aggs 考虑嵌套过滤器?
POST sample/_search?filter_path=aggregations.teams.buckets.key,aggregations.teams.buckets.members.*.value,aggregations.teams.buckets.members.*.hits.hits._source.*,hits.hits.inner_hits.members.hits.hits._source.*
{
"aggs": {
"teams": {
"terms": {
"field": "teamId",
"size": 10
},
"aggs": {
"members": {
"nested": {
"path": "members"
} ,
"aggs": {
"min_age": { "min": { "field": "members.age" } }
, "max_age": { "max": { "field": "members.age" } }
}
}
}
}
}
, "query": {
"bool": {
"filter": [
{ "nested": {
"path": "members",
"query": {
"bool": {
"must": [
{ "range": { "members.experience": { "gte": 3 } } }
]
}
}
, "inner_hits" : { "size" : 100 , "sort" : [ { "members.memberId" : { "order" : "asc" } } ] }
}
}
]
}
}
}
上面的输出:
{
"hits" : {
"hits" : [
{
"inner_hits" : {
"members" : {
"hits" : {
"hits" : [
{
"_source" : {
"experience" : "3",
"age" : "33",
"memberId" : "C3"
}
}
]
}
}
}
},
{
"inner_hits" : {
"members" : {
"hits" : {
"hits" : [
{
"_source" : {
"experience" : "3",
"age" : "32",
"memberId" : "B3"
}
},
{
"_source" : {
"experience" : "4",
"age" : "42",
"memberId" : "B4"
}
}
]
}
}
}
},
{
"inner_hits" : {
"members" : {
"hits" : {
"hits" : [
{
"_source" : {
"experience" : "3",
"age" : "31",
"memberId" : "A3"
}
}
]
}
}
}
}
]
},
"aggregations" : {
"teams" : {
"buckets" : [
{
"key" : "A",
"members" : {
"max_age" : {
"value" : 31.0
},
"min_age" : {
"value" : 11.0
}
}
},
{
"key" : "B",
"members" : {
"max_age" : {
"value" : 42.0
},
"min_age" : {
"value" : 12.0
}
}
},
{
"key" : "C",
"members" : {
"max_age" : {
"value" : 33.0
},
"min_age" : {
"value" : 13.0
}
}
}
]
}
}
}
当您在查询中使用嵌套过滤器时,elasticsearch 会过滤主要文档并聚合它们,但它不会在聚合时过滤嵌套文档。您应该将相同的过滤器添加到聚合中。此查询应该适合您。
{
"aggs": {
"teams": {
"terms": {
"field": "teamId",
"size": 10
},
"aggs": {
"members": {
"nested": {
"path": "members"
},
"aggs": {
"filtered": {
"filter": {
"range": {
"members.experience": {
"gte": 3
}
}
},
"aggs": {
"min_age": {
"min": {
"field": "members.age"
}
},
"max_age": {
"max": {
"field": "members.age"
}
}
}
}
}
}
}
}
},
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "members",
"query": {
"bool": {
"must": [
{
"range": {
"members.experience": {
"gte": 3
}
}
}
]
}
},
"inner_hits": {
"size": 100,
"sort": [
{
"members.memberId": {
"order": "asc"
}
}
]
}
}
}
]
}
}
}
YD9 上面的回答非常适合所问的问题,但我发现当我需要添加更多嵌套条件时,我不得不稍微修改一下。以下是允许您执行此操作的语法。它需要将 YD9 查询中的“过滤器”模式更改为“filter/bool/filter”模式。
GET sample/_search?filter_path=hits.total.value,aggregations.teams.buckets.key,aggregations.teams.buckets.members.*.*.value,hits.hits.inner_hits.members.hits.hits._source.*
{
"aggs": {
"teams": {
"terms": {
"field": "teamId",
"size": 10
},
"aggs": {
"members": {
"nested": {
"path": "members"
},
"aggs": {
"filtered": {
"filter": {
"bool": {
"filter" :
[
// These conditions must match what is in nested query filter below
{ "match_all" : {} }
, { "range": { "members.experience": { "gte": 2 } } }
// More criteria can be added below
]
}
},
"aggs": {
"min_age": { "min": { "field": "members.age" } }
}
}
}
}
}
}
},
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "members",
"query": {
"bool": {
"must": [
{ "match_all" : {} }
, { "range": { "members.experience": { "gte": 2 } } }
// More criteria can be added below
]
}
},
"inner_hits": {
"size": 100,
"sort": [
{
"members.memberId": {
"order": "asc"
}
}
]
}
}
}
]
}
}
}
是否有一种语法可以将嵌套文档聚合到遵守嵌套过滤的每个主文档?例如,在下面的示例中,我将如何找回每支至少有 2 年经验的球队的最年轻球员?我正在使用 Elasticsearch 7.10。
PUT sample
{
"mappings": {
"dynamic": "strict",
"properties": {
"teamId": { "type": "keyword", "index": true, "doc_values": true },
"members": {
"type": "nested",
"properties": {
"memberId": { "type": "keyword", "index": true, "doc_values": true },
"age": { "type": "integer", "index": true, "doc_values": true },
"experience": { "type": "integer", "index": true, "doc_values": true }
}
}
}
}
}
PUT sample/_doc/1
{
"teamId" : "A"
, "members" :
[
{ "memberId" : "A1" , "age" : "11" , "experience" : "1" }
, { "memberId" : "A2" , "age" : "21" , "experience" : "2" }
, { "memberId" : "A3" , "age" : "31" , "experience" : "3" }
]
}
PUT sample/_doc/2
{
"teamId" : "B"
, "members" :
[
{ "memberId" : "B1" , "age" : "12" , "experience" : "1" }
, { "memberId" : "B2" , "age" : "22" , "experience" : "2" }
, { "memberId" : "B3" , "age" : "32" , "experience" : "3" }
, { "memberId" : "B4" , "age" : "42" , "experience" : "4" }
]
}
PUT sample/_doc/3
{
"teamId" : "C"
, "members" :
[
{ "memberId" : "C1" , "age" : "13" , "experience" : "1" }
, { "memberId" : "C2" , "age" : "23" , "experience" : "2" }
, { "memberId" : "C3" , "age" : "33" , "experience" : "3" }
]
}
我可以在不过滤的情况下使用它,如下所示:
POST sample/_search?filter_path=aggregations.teams.buckets.key,aggregations.teams.buckets.members.*.value,aggregations.teams.buckets.members.*.hits.hits._source.*
{
"aggs": {
"teams": {
"terms": {
"field": "teamId",
"size": 10
},
"aggs": {
"members": {
"nested": {
"path": "members"
} ,
"aggs": {
"min_age": { "min": { "field": "members.age" } }
, "max_age": { "max": { "field": "members.age" } }
}
}
}
}
}
}
但是当我如下所示添加过滤时,我可以让 inner_hits 正确过滤,但 aggs 不考虑它。有没有办法让 aggs 考虑嵌套过滤器?
POST sample/_search?filter_path=aggregations.teams.buckets.key,aggregations.teams.buckets.members.*.value,aggregations.teams.buckets.members.*.hits.hits._source.*,hits.hits.inner_hits.members.hits.hits._source.*
{
"aggs": {
"teams": {
"terms": {
"field": "teamId",
"size": 10
},
"aggs": {
"members": {
"nested": {
"path": "members"
} ,
"aggs": {
"min_age": { "min": { "field": "members.age" } }
, "max_age": { "max": { "field": "members.age" } }
}
}
}
}
}
, "query": {
"bool": {
"filter": [
{ "nested": {
"path": "members",
"query": {
"bool": {
"must": [
{ "range": { "members.experience": { "gte": 3 } } }
]
}
}
, "inner_hits" : { "size" : 100 , "sort" : [ { "members.memberId" : { "order" : "asc" } } ] }
}
}
]
}
}
}
上面的输出:
{
"hits" : {
"hits" : [
{
"inner_hits" : {
"members" : {
"hits" : {
"hits" : [
{
"_source" : {
"experience" : "3",
"age" : "33",
"memberId" : "C3"
}
}
]
}
}
}
},
{
"inner_hits" : {
"members" : {
"hits" : {
"hits" : [
{
"_source" : {
"experience" : "3",
"age" : "32",
"memberId" : "B3"
}
},
{
"_source" : {
"experience" : "4",
"age" : "42",
"memberId" : "B4"
}
}
]
}
}
}
},
{
"inner_hits" : {
"members" : {
"hits" : {
"hits" : [
{
"_source" : {
"experience" : "3",
"age" : "31",
"memberId" : "A3"
}
}
]
}
}
}
}
]
},
"aggregations" : {
"teams" : {
"buckets" : [
{
"key" : "A",
"members" : {
"max_age" : {
"value" : 31.0
},
"min_age" : {
"value" : 11.0
}
}
},
{
"key" : "B",
"members" : {
"max_age" : {
"value" : 42.0
},
"min_age" : {
"value" : 12.0
}
}
},
{
"key" : "C",
"members" : {
"max_age" : {
"value" : 33.0
},
"min_age" : {
"value" : 13.0
}
}
}
]
}
}
}
当您在查询中使用嵌套过滤器时,elasticsearch 会过滤主要文档并聚合它们,但它不会在聚合时过滤嵌套文档。您应该将相同的过滤器添加到聚合中。此查询应该适合您。
{
"aggs": {
"teams": {
"terms": {
"field": "teamId",
"size": 10
},
"aggs": {
"members": {
"nested": {
"path": "members"
},
"aggs": {
"filtered": {
"filter": {
"range": {
"members.experience": {
"gte": 3
}
}
},
"aggs": {
"min_age": {
"min": {
"field": "members.age"
}
},
"max_age": {
"max": {
"field": "members.age"
}
}
}
}
}
}
}
}
},
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "members",
"query": {
"bool": {
"must": [
{
"range": {
"members.experience": {
"gte": 3
}
}
}
]
}
},
"inner_hits": {
"size": 100,
"sort": [
{
"members.memberId": {
"order": "asc"
}
}
]
}
}
}
]
}
}
}
YD9 上面的回答非常适合所问的问题,但我发现当我需要添加更多嵌套条件时,我不得不稍微修改一下。以下是允许您执行此操作的语法。它需要将 YD9 查询中的“过滤器”模式更改为“filter/bool/filter”模式。
GET sample/_search?filter_path=hits.total.value,aggregations.teams.buckets.key,aggregations.teams.buckets.members.*.*.value,hits.hits.inner_hits.members.hits.hits._source.*
{
"aggs": {
"teams": {
"terms": {
"field": "teamId",
"size": 10
},
"aggs": {
"members": {
"nested": {
"path": "members"
},
"aggs": {
"filtered": {
"filter": {
"bool": {
"filter" :
[
// These conditions must match what is in nested query filter below
{ "match_all" : {} }
, { "range": { "members.experience": { "gte": 2 } } }
// More criteria can be added below
]
}
},
"aggs": {
"min_age": { "min": { "field": "members.age" } }
}
}
}
}
}
}
},
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "members",
"query": {
"bool": {
"must": [
{ "match_all" : {} }
, { "range": { "members.experience": { "gte": 2 } } }
// More criteria can be added below
]
}
},
"inner_hits": {
"size": 100,
"sort": [
{
"members.memberId": {
"order": "asc"
}
}
]
}
}
}
]
}
}
}