ElasticSearch - 包含文档详细信息的聚合
ElasticSearch - Aggregations with document details
我需要汇总以下文件:
{
"title": "American Psycho",
"releaseDate": "7/06/2000",
"imdbRate": "7.6",
"casting": [
{
"name": "Christian Bale",
"category": "Actor"
},
{
"name": "Justin Theroux",
"category": "Actor"
}
]
}
{
"title": "The Dark Knight",
"releaseDate": "13/08/2008",
"imdbRate": "9.0",
"casting": [
{
"name": "Christian Bale",
"category": "Actor"
},
{
"name": "Morgan Freeman",
"category": "Actor"
}
]
}
by actor,想得到以下结构:
[
{"name": "Christian Bale"},
{"movies": [
{
"title": "American Psycho",
"releaseDate": "7/06/2000",
"imdbRate": "7.6"
},
{
"title": "The Dark Knight",
"releaseDate": "13/08/2008",
"imdbRate": "9.0"
}, ...
]
除了使用基于casting.name字段的标准术语聚合外,如何检索相关文档的releaseDate和imdbRate?
对于每个演员,我还需要按 releaseDate asc 排序的电影。
我可以使用一个请求执行此操作吗?
由于文档中有一个 casting
对象数组,因此您需要在映射中使用嵌套类型。要获得所需的聚合,您需要 Terms Aggregations, Nested Aggregations and Reverse Nested Aggregations 的组合。下面是一个例子。
使用映射创建索引:
POST /test
{
"mappings": {
"movie": {
"properties": {
"title": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"releaseDate": {
"type": "string",
"index": "not_analyzed"
},
"casting": {
"type": "nested",
"properties": {
"name": {
"type": "string",
"fields":{
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"category": {
"type": "string",
"fields":{
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
}
}
索引文档:
POST /test/movie/1
{
"title": "American Psycho",
"releaseDate": "7/06/2000",
"imdbRate": "7.6",
"casting": [
{
"name": "Christian Bale",
"category": "Actor"
},
{
"name": "Justin Theroux",
"category": "Actor"
}
]
}
POST /test/movie/2
{
"title": "The Dark Knight",
"releaseDate": "13/08/2008",
"imdbRate": "9.0",
"casting": [
{
"name": "Christian Bale",
"category": "Actor"
},
{
"name": "Morgan Freeman",
"category": "Actor"
}
]
}
最后搜索:
POST /test/movie/_search?search_type=count
{
"aggs": {
"nested_path": {
"nested": {
"path": "casting"
},
"aggs": {
"actor_name": {
"terms": {
"field": "casting.name.raw"
},
"aggs": {
"movies": {
"reverse_nested": {},
"aggs": {
"movie_title": {
"terms": {
"field": "title.raw"
},
"aggs": {
"release_date": {
"terms": {
"field": "releaseDate"
}
},
"imdbRate_date": {
"terms": {
"field": "imdbRate"
}
}
}
}
}
}
}
}
}
}
}
}
Christian Bale 的回复是:
{
"key": "Christian Bale",
"doc_count": 2,
"movies": {
"doc_count": 2,
"movie_title": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "American Psycho",
"doc_count": 1,
"release_date": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "7/06/2000",
"doc_count": 1
}
]
},
"imdbRate_date": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "7.6",
"doc_count": 1
}
]
}
},
{
"key": "The Dark Knight",
"doc_count": 1,
"release_date": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "13/08/2008",
"doc_count": 1
}
]
},
"imdbRate_date": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "9.0",
"doc_count": 1
}
]
}
}
]
}
}
}
我需要汇总以下文件:
{
"title": "American Psycho",
"releaseDate": "7/06/2000",
"imdbRate": "7.6",
"casting": [
{
"name": "Christian Bale",
"category": "Actor"
},
{
"name": "Justin Theroux",
"category": "Actor"
}
]
}
{
"title": "The Dark Knight",
"releaseDate": "13/08/2008",
"imdbRate": "9.0",
"casting": [
{
"name": "Christian Bale",
"category": "Actor"
},
{
"name": "Morgan Freeman",
"category": "Actor"
}
]
}
by actor,想得到以下结构:
[
{"name": "Christian Bale"},
{"movies": [
{
"title": "American Psycho",
"releaseDate": "7/06/2000",
"imdbRate": "7.6"
},
{
"title": "The Dark Knight",
"releaseDate": "13/08/2008",
"imdbRate": "9.0"
}, ...
]
除了使用基于casting.name字段的标准术语聚合外,如何检索相关文档的releaseDate和imdbRate? 对于每个演员,我还需要按 releaseDate asc 排序的电影。
我可以使用一个请求执行此操作吗?
由于文档中有一个 casting
对象数组,因此您需要在映射中使用嵌套类型。要获得所需的聚合,您需要 Terms Aggregations, Nested Aggregations and Reverse Nested Aggregations 的组合。下面是一个例子。
使用映射创建索引:
POST /test
{
"mappings": {
"movie": {
"properties": {
"title": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"releaseDate": {
"type": "string",
"index": "not_analyzed"
},
"casting": {
"type": "nested",
"properties": {
"name": {
"type": "string",
"fields":{
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"category": {
"type": "string",
"fields":{
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
}
}
索引文档:
POST /test/movie/1
{
"title": "American Psycho",
"releaseDate": "7/06/2000",
"imdbRate": "7.6",
"casting": [
{
"name": "Christian Bale",
"category": "Actor"
},
{
"name": "Justin Theroux",
"category": "Actor"
}
]
}
POST /test/movie/2
{
"title": "The Dark Knight",
"releaseDate": "13/08/2008",
"imdbRate": "9.0",
"casting": [
{
"name": "Christian Bale",
"category": "Actor"
},
{
"name": "Morgan Freeman",
"category": "Actor"
}
]
}
最后搜索:
POST /test/movie/_search?search_type=count
{
"aggs": {
"nested_path": {
"nested": {
"path": "casting"
},
"aggs": {
"actor_name": {
"terms": {
"field": "casting.name.raw"
},
"aggs": {
"movies": {
"reverse_nested": {},
"aggs": {
"movie_title": {
"terms": {
"field": "title.raw"
},
"aggs": {
"release_date": {
"terms": {
"field": "releaseDate"
}
},
"imdbRate_date": {
"terms": {
"field": "imdbRate"
}
}
}
}
}
}
}
}
}
}
}
}
Christian Bale 的回复是:
{
"key": "Christian Bale",
"doc_count": 2,
"movies": {
"doc_count": 2,
"movie_title": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "American Psycho",
"doc_count": 1,
"release_date": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "7/06/2000",
"doc_count": 1
}
]
},
"imdbRate_date": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "7.6",
"doc_count": 1
}
]
}
},
{
"key": "The Dark Knight",
"doc_count": 1,
"release_date": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "13/08/2008",
"doc_count": 1
}
]
},
"imdbRate_date": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "9.0",
"doc_count": 1
}
]
}
}
]
}
}
}