聚合的进一步过滤
Further filtering of aggregations
我对弹性搜索中的聚合有疑问。我有如下文档:
{
"_index": "products",
"_type": "product",
"_id": "ID-12345",
"_score": 1,
"_source": {
"created_at": "2017-08-04T17:56:44.592Z",
"updated_at": "2017-08-04T17:56:44.592Z",
"product_information": {
"sku": "12345",
"name": "Product Name",
"price": 25,
"brand": "Brand Name",
"url": "URL"
},
"product_detail": {
"description": "Product description text here.",
"string_facets": [
{
"facet_name": "Colour",
"facet_value": "Grey"
},
{
"facet_name": "Category",
"facet_value": "Linen"
},
{
"facet_name": "Category",
"facet_value": "Throws & Blanket"
},
{
"facet_name": "Keyword",
"facet_value": "Contemporary"
},
{
"facet_name": "Keyword",
"facet_value": "Sophisticated"
}
]
}
}
}
我正在 product_detail.string_facets
字段中存储颜色、Material、类别和关键字等产品信息。我想将其用于聚合以获得 Colour/Material/Category/Keyword 建议,但作为单独的存储桶。即,product_detail.string_facets.facet_name
.
中定义的每个 string_facet 类型都有一个单独的存储桶
这是我目前正在返回数据的查询,但与我预期的不同。首先是查询(这只是为了尝试获取颜色):
{
"from": 0,
"size": 12,
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "Rug",
"fields": ["product_information.name", "product_detail.string_facets.facet_value"]
}
},
{
"multi_match": {
"query": "Blue",
"fields": ["product_information.name", "product_detail.string_facets.facet_name"]
}
}
],
"minimum_should_match": "100%"
}
},
"aggs": {
"suggestions": {
"filter": { "term": { "product_detail.string_facets.facet_name.keyword": "Colour" }},
"aggs": {
"colours": {
"terms": {
"field": "product_detail.string_facets.facet_value.keyword",
"size": 10
}
}
}
}
}
}
这给我如下输出:
"aggregations": {
"suggestions": {
"doc_count": 21,
"colours": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 23,
"buckets": [
{
"key": "Rug",
"doc_count": 21
},
{
"key": "Blue",
"doc_count": 18
},
{
"key": "Bold",
"doc_count": 7
},
{
"key": "Modern",
"doc_count": 6
},
{
"key": "Multi-Coloured",
"doc_count": 5
},
{
"key": "Contemporary",
"doc_count": 4
},
{
"key": "Traditional",
"doc_count": 4
},
{
"key": "White",
"doc_count": 4
},
{
"key": "Luxurious",
"doc_count": 3
},
{
"key": "Minimal",
"doc_count": 3
}
]
}
}
}
它给了我所有 facet_name
的结果,而不是我认为的 facet_type
颜色的结果。
如有任何帮助,我们将不胜感激。 Elasticsearch 看起来很强大,但是文档却让人望而生畏!
你没有展示映射的样子,但我想 product_detail.string_facets
字段只是一个内部对象字段,这就是你得到这种结果的原因。使用这种类型的映射,Elasticsearch 将数组扁平化为字段名称和值的简单列表。在你的情况下它变成:
{
"product_detail.string_facets.facet_name": ["Colour", "Category", "Keyword"],
"product_detail.string_facets.facet_value": ["Grey", "Linen", "Throws & Blanket", "Contemporary", "Sophisticated"]
}
如您所见,基于这种结构,Elasticsearch 无法知道如何聚合数据。
要使其工作 product_detail.string_facets
字段应为 nested
类型。 string_facets
的映射应该与此类似(注意 "type": "nested"
):
"string_facets": {
"type": "nested",
"properties": {
"facet_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"facet_value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
现在我索引以下文档:
{
"created_at": "2017-08-04T17:56:44.592Z",
"updated_at": "2017-08-04T17:56:44.592Z",
"product_information": {
"sku": "12345",
"name": "Rug",
"price": 25,
"brand": "Brand Name",
"url": "URL"
},
"product_detail": {
"description": "Product description text here.",
"string_facets": [
{
"facet_name": "Colour",
"facet_value": "Blue"
},
{
"facet_name": "Colour",
"facet_value": "Red"
},
{
"facet_name": "Category",
"facet_value": "Throws & Blanket"
},
{
"facet_name": "Keyword",
"facet_value": "Contemporary"
}
]
}
}
现在,要将颜色建议汇总为单独的存储桶,您可以尝试此查询(我根据文档的需要简化了 bool query
):
{
"from": 0,
"size": 12,
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "Rug",
"fields": ["product_information.name", "product_detail.string_facets.facet_value"]
}
}
]
}
},
"aggs": {
"facets": {
"nested" : {
"path" : "product_detail.string_facets"
},
"aggs": {
"suggestions": {
"filter": { "term": { "product_detail.string_facets.facet_name.keyword": "Colour" }},
"aggs": {
"colours": {
"terms": {
"field": "product_detail.string_facets.facet_value.keyword",
"size": 10
}
}
}
}
}
}
}
}
结果:
{
...,
"hits": {
...
},
"aggregations": {
"facets": {
"doc_count": 5,
"suggestions": {
"doc_count": 2,
"colours": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Blue",
"doc_count": 1
},
{
"key": "Red",
"doc_count": 1
}
]
}
}
}
}
}
我对弹性搜索中的聚合有疑问。我有如下文档:
{
"_index": "products",
"_type": "product",
"_id": "ID-12345",
"_score": 1,
"_source": {
"created_at": "2017-08-04T17:56:44.592Z",
"updated_at": "2017-08-04T17:56:44.592Z",
"product_information": {
"sku": "12345",
"name": "Product Name",
"price": 25,
"brand": "Brand Name",
"url": "URL"
},
"product_detail": {
"description": "Product description text here.",
"string_facets": [
{
"facet_name": "Colour",
"facet_value": "Grey"
},
{
"facet_name": "Category",
"facet_value": "Linen"
},
{
"facet_name": "Category",
"facet_value": "Throws & Blanket"
},
{
"facet_name": "Keyword",
"facet_value": "Contemporary"
},
{
"facet_name": "Keyword",
"facet_value": "Sophisticated"
}
]
}
}
}
我正在 product_detail.string_facets
字段中存储颜色、Material、类别和关键字等产品信息。我想将其用于聚合以获得 Colour/Material/Category/Keyword 建议,但作为单独的存储桶。即,product_detail.string_facets.facet_name
.
这是我目前正在返回数据的查询,但与我预期的不同。首先是查询(这只是为了尝试获取颜色):
{
"from": 0,
"size": 12,
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "Rug",
"fields": ["product_information.name", "product_detail.string_facets.facet_value"]
}
},
{
"multi_match": {
"query": "Blue",
"fields": ["product_information.name", "product_detail.string_facets.facet_name"]
}
}
],
"minimum_should_match": "100%"
}
},
"aggs": {
"suggestions": {
"filter": { "term": { "product_detail.string_facets.facet_name.keyword": "Colour" }},
"aggs": {
"colours": {
"terms": {
"field": "product_detail.string_facets.facet_value.keyword",
"size": 10
}
}
}
}
}
}
这给我如下输出:
"aggregations": {
"suggestions": {
"doc_count": 21,
"colours": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 23,
"buckets": [
{
"key": "Rug",
"doc_count": 21
},
{
"key": "Blue",
"doc_count": 18
},
{
"key": "Bold",
"doc_count": 7
},
{
"key": "Modern",
"doc_count": 6
},
{
"key": "Multi-Coloured",
"doc_count": 5
},
{
"key": "Contemporary",
"doc_count": 4
},
{
"key": "Traditional",
"doc_count": 4
},
{
"key": "White",
"doc_count": 4
},
{
"key": "Luxurious",
"doc_count": 3
},
{
"key": "Minimal",
"doc_count": 3
}
]
}
}
}
它给了我所有 facet_name
的结果,而不是我认为的 facet_type
颜色的结果。
如有任何帮助,我们将不胜感激。 Elasticsearch 看起来很强大,但是文档却让人望而生畏!
你没有展示映射的样子,但我想 product_detail.string_facets
字段只是一个内部对象字段,这就是你得到这种结果的原因。使用这种类型的映射,Elasticsearch 将数组扁平化为字段名称和值的简单列表。在你的情况下它变成:
{
"product_detail.string_facets.facet_name": ["Colour", "Category", "Keyword"],
"product_detail.string_facets.facet_value": ["Grey", "Linen", "Throws & Blanket", "Contemporary", "Sophisticated"]
}
如您所见,基于这种结构,Elasticsearch 无法知道如何聚合数据。
要使其工作 product_detail.string_facets
字段应为 nested
类型。 string_facets
的映射应该与此类似(注意 "type": "nested"
):
"string_facets": {
"type": "nested",
"properties": {
"facet_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"facet_value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
现在我索引以下文档:
{
"created_at": "2017-08-04T17:56:44.592Z",
"updated_at": "2017-08-04T17:56:44.592Z",
"product_information": {
"sku": "12345",
"name": "Rug",
"price": 25,
"brand": "Brand Name",
"url": "URL"
},
"product_detail": {
"description": "Product description text here.",
"string_facets": [
{
"facet_name": "Colour",
"facet_value": "Blue"
},
{
"facet_name": "Colour",
"facet_value": "Red"
},
{
"facet_name": "Category",
"facet_value": "Throws & Blanket"
},
{
"facet_name": "Keyword",
"facet_value": "Contemporary"
}
]
}
}
现在,要将颜色建议汇总为单独的存储桶,您可以尝试此查询(我根据文档的需要简化了 bool query
):
{
"from": 0,
"size": 12,
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "Rug",
"fields": ["product_information.name", "product_detail.string_facets.facet_value"]
}
}
]
}
},
"aggs": {
"facets": {
"nested" : {
"path" : "product_detail.string_facets"
},
"aggs": {
"suggestions": {
"filter": { "term": { "product_detail.string_facets.facet_name.keyword": "Colour" }},
"aggs": {
"colours": {
"terms": {
"field": "product_detail.string_facets.facet_value.keyword",
"size": 10
}
}
}
}
}
}
}
}
结果:
{
...,
"hits": {
...
},
"aggregations": {
"facets": {
"doc_count": 5,
"suggestions": {
"doc_count": 2,
"colours": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Blue",
"doc_count": 1
},
{
"key": "Red",
"doc_count": 1
}
]
}
}
}
}
}