在 Elasticsearch 5 上使用过滤器获取嵌套文档
Get nested documents with a filter on Elasticsearch 5
我在 ES 5 中映射了以下文档:
{
"appName" : {
"mappings" : {
"market_audit" : {
"properties" : {
"generation_date": {
"type": "date"
},
"customers" : {
"type" : "nested",
"properties" : {
"customer_id" : {
"type" : "integer"
},
[... other properties ...]
}
"customers" 节点中的多个条目可能具有相同的 customer_id,我试图仅检索具有特定 customer_id(即“1”)的条目与顶级文件的"generation_date"(只处理最新的文件)。
我能够提出以下查询:
{
"query": {},
"sort": [
{ "generation_date": "desc" }
],
"size": 1,
"aggregations": {
"nested": {
"nested": {
"path": "customers"
},
"aggregations": {
"filter": {
"filter": {
"match": {
"customers.customer_id": {
"query": "1"
}
}
},
"aggregations": {
"tophits_agg": {
"top_hits": {}
}
}
}
}
}
}
}
此查询获取我感兴趣的数据,位于 "aggregations" 数组中(连同包含整个文档的 "hits" 数组)。这里的问题是我使用的框架(ONGR 的 ElasticSearch 捆绑包和 DSL 捆绑包,使用 Symfony3)每次我尝试访问没有可用桶的实际数据时都会抱怨。
我已阅读 ES 文档,但无法提出添加存储桶的有效查询。我确定我遗漏了一些东西,非常欢迎提供一点帮助。如果您对如何适当修改查询有想法,我想我可以想出 PHP 代码来生成它。
编辑:因为这个问题有一些观点但没有答案(而且我仍然卡住了),我会接受任何允许我检索有关特定 "customer" 信息的查询(使用 customer_id) 来自最新生成的文档(根据 "generation_date" 字段)。我给出的查询正是我能够想到的,而且我很确定有更好的方法来做到这一点。有什么建议吗?
编辑 2:
这是发送到 ES 的数据:
{
"index": {
"_type": "market_data_audit_document"
}
}
{
"customers": [
{
"customer_id": 1,
"colocation_name": "colo1",
"colocation_id": 26,
"device_name": "device 1",
"channels": [
{
"name": "channel1-5",
"multicast":"1.2.1.5",
"sugar_state":4,
"network_state":1
}
]
},
{
"customer_id":2,
"colocation_name":"colo2",
"colocation_id":27,
"device_name":"device 2",
"channels": [
{
"name":"channel2-5",
"multicast":"1.2.2.5",
"sugar_state":4,
"network_state":1
}
]
},
{
"customer_id":3,
"colocation_name":"colo3",
"colocation_id":28,
"device_name":"device 3",
"channels": [
{
"name":"channel3-5",
"multicast":"1.2.3.5",
"sugar_state":4,
"network_state":1
}
]
},
{
"customer_id":4,
"colocation_name":"colo4",
"colocation_id":29,
"device_name":"device 4"
,"channels": [
{
"name":"channel4-5",
"multicast":"1.2.4.5",
"sugar_state":4,
"network_state":1
}
]
},
{
"customer_id":5,
"colocation_name":"colo5",
"colocation_id":30,
"device_name":"device 5",
"channels": [
{
"name":"channel5-5",
"multicast":"1.2.5.5",
"sugar_state":4,
"network_state":1
}
]
}
],
"generation_date":"2017-02-27T10:55:45+0100"
}
不幸的是,当我尝试发送此 post 中列出的查询时,我发现聚合没有按照我的预期进行:它 returns "good" 数据,而是来自ALL的存储文件!这是一个输出示例:
{
"timed_out" : false,
"took" : 60,
"hits" : {
"total" : 2,
"hits" : [
{
"_source" : {
"customers" : [
{
"colocation_id" : 26,
"channels" : [
{
"name" : "channel1-5",
"sugar_state" : 4,
"network_state" : 1,
"multicast" : "1.2.1.5"
}
],
"customer_id" : 1,
"colocation_name" : "colo1",
"device_name" : "device 1"
},
{
"colocation_id" : 27,
"channels" : [
{
"multicast" : "1.2.2.5",
"network_state" : 1,
"name" : "channel2-5",
"sugar_state" : 4
}
],
"customer_id" : 2,
"device_name" : "device 2",
"colocation_name" : "colo2"
},
{
"device_name" : "device 3",
"colocation_name" : "colo3",
"customer_id" : 3,
"channels" : [
{
"multicast" : "1.2.3.5",
"network_state" : 1,
"sugar_state" : 4,
"name" : "channel3-5"
}
],
"colocation_id" : 28
},
{
"channels" : [
{
"sugar_state" : 4,
"name" : "channel4-5",
"multicast" : "1.2.4.5",
"network_state" : 1
}
],
"customer_id" : 4,
"colocation_id" : 29,
"colocation_name" : "colo4",
"device_name" : "device 4"
},
{
"device_name" : "device 5",
"colocation_name" : "colo5",
"colocation_id" : 30,
"channels" : [
{
"sugar_state" : 4,
"name" : "channel5-5",
"multicast" : "1.2.5.5",
"network_state" : 1
}
],
"customer_id" : 5
}
],
"generation_date" : "2017-02-27T11:45:37+0100"
},
"_type" : "market_data_audit_document",
"sort" : [
1488192337000
],
"_index" : "mars",
"_score" : null,
"_id" : "AVp_LPeJdrvi0cWb8CrL"
}
],
"max_score" : null
},
"aggregations" : {
"nested" : {
"doc_count" : 10,
"filter" : {
"doc_count" : 2,
"tophits_agg" : {
"hits" : {
"max_score" : 1,
"total" : 2,
"hits" : [
{
"_nested" : {
"offset" : 0,
"field" : "customers"
},
"_score" : 1,
"_source" : {
"channels" : [
{
"name" : "channel1-5",
"sugar_state" : 4,
"multicast" : "1.2.1.5",
"network_state" : 1
}
],
"customer_id" : 1,
"colocation_id" : 26,
"colocation_name" : "colo1",
"device_name" : "device 1"
}
},
{
"_source" : {
"colocation_id" : 26,
"customer_id" : 1,
"channels" : [
{
"multicast" : "1.2.1.5",
"network_state" : 1,
"name" : "channel1-5",
"sugar_state" : 4
}
],
"device_name" : "device 1",
"colocation_name" : "colo1"
},
"_nested" : {
"offset" : 0,
"field" : "customers"
},
"_score" : 1
}
]
}
}
}
}
},
"_shards" : {
"total" : 13,
"successful" : 1,
"failures" : [
{
"reason" : {
"index" : ".kibana",
"index_uuid" : "bTkwoysSQ0y8Tt9yYFRStg",
"type" : "query_shard_exception",
"reason" : "No mapping found for [generation_date] in order to sort on"
},
"shard" : 0,
"node" : "4ZUgOm4VRry6EtUK15UH3Q",
"index" : ".kibana"
},
{
"reason" : {
"index_uuid" : "lN2mVF9bRjuDtiBF2qACfA",
"index" : "archiv1_log",
"type" : "query_shard_exception",
"reason" : "No mapping found for [generation_date] in order to sort on"
},
"shard" : 0,
"node" : "4ZUgOm4VRry6EtUK15UH3Q",
"index" : "archiv1_log"
},
{
"index" : "archiv1_session",
"shard" : 0,
"node" : "4ZUgOm4VRry6EtUK15UH3Q",
"reason" : {
"type" : "query_shard_exception",
"index" : "archiv1_session",
"index_uuid" : "cmMAW04YTtCb0khEqHpNyA",
"reason" : "No mapping found for [generation_date] in order to sort on"
}
},
{
"shard" : 0,
"node" : "4ZUgOm4VRry6EtUK15UH3Q",
"reason" : {
"reason" : "No mapping found for [generation_date] in order to sort on",
"index" : "archiv1_users_dev",
"index_uuid" : "AH48gIf5T0CXSQaE7uvVRg",
"type" : "query_shard_exception"
},
"index" : "archiv1_users_dev"
}
],
"failed" : 12
}
}
根据您的描述:
- 你用一堆属性在 elasticsearch 上存储文档
- 每个文档都包含数组中的客户列表(嵌套文档)
- 您只想提取与 customer.id
相关的嵌套文档
- 您的库无法管理没有存储桶的 Elasticsearch 响应
- 您期望 Elasticsearch return 嵌套文档
问题
它存在 2 种聚合:
- 桶
- 指标
在您的情况下,您在 Nested Agg 下有 2 个聚合:Filter 和 Metric。
过滤器:
- Filter defines a single bucket of all the documents 但未在结果中提供 'bucket' 关键字。
- 热门点击是一个指标,不提供桶。
解决方法:
我怀疑您的 PHP 库能否正确处理嵌套聚合结果,但您可以使用 Filters 而不是 Filter Aggregations 来获取存储桶列表
{
"aggregations": {
"nested": {
"nested": {
"path": "customers"
},
"aggregations": {
"filters_customer": {
"filters": {
"filters": [
{
"match": {
"customers.customer_id": "1"
}
}
]
},
"aggregations": {
"top_hits_customer": {
"top_hits": {}
}
}
}
}
}
}
}
将提供如下内容:
{
"aggregations": {
"nested": {
"doc_count": 15,
"filters_customer": {
"buckets": [
{
"doc_count": 3,
"top_hits_customer": {
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_nested": {
"field": "customers",
"offset": 0
},
"_score": 1,
"_source": {
"customer_id": 1,
"foo": "bar"
}
},
{
"_nested": {
"field": "customers",
"offset": 0
},
"_score": 1,
"_source": {
"customer_id": 1,
"foo": "bar"
}
},
{
"_nested": {
"field": "customers",
"offset": 0
},
"_score": 1,
"_source": {
"customer_id": 1,
"foo": "bar"
}
}
]
}
}
}
]
}
}
}
}
关于您的 EDIT 2 的注意事项
Elasticsearch 将搜索所有文档,而不是根据您的报告日期搜索 'TOP 1' 文档。按报告拆分结果的一种方法是在报告日期使用术语桶:
{
"query": {},
"size": 0,
"aggregations": {
"grp_report": {
"terms": {
"field": "generation_date"
},
"aggregations": {
"nested_customers": {
"nested": {
"path": "customers"
},
"aggregations": {
"filters_customer": {
"filters": {
"filters": [
{
"match": {
"customers.customer_id": "1"
}
}
]
},
"aggregations": {
"top_hits_customer": {
"top_hits": {}
}
}
}
}
}
}
}
}
}
建议:
避免使用复杂的文档,更喜欢将您的报告拆分为具有相关键(例如 reportId)的小文档。您将能够在没有任何嵌套文档的情况下轻松地进行过滤和聚合。在 上添加客户文档信息,您将过滤所有类型(在这种情况下冗余不是问题)。
用例示例:
- 报告列表
- 根据报告显示客户信息
- 跨多个报告显示客户的历史记录
当前文档示例:/indexName/market_audit
{
"generation_date": "...",
"customers": [
{
"id": 1,
"foo": "bar 1"
},
{
"id": 2,
"foo": "bar 2"
},
{
"id": 3,
"foo": "bar 3"
}
]
}
修改后的文档:
/indexName/market_audit_report
{
"report_id" : "123456"
"generation_date": "...",
"foo":"bar"
}
/indexName/market_audit_客户文件
{
"report_id" : "123456"
"customer_id": 1,
"foo": "bar 1"
}
{
"report_id" : "123456"
"customer_id": 2,
"foo": "bar 2"
}
{
"report_id" : "123456"
"customer_id": 3,
"foo": "bar 3"
}
如果您知道您的报告 ID,您将能够在一个请求中获取所有数据:
- 报告 ID 过滤器
- 类型上的术语聚合
- 类型报告过滤器
- 一个top_hit聚合得到报告
- 过滤聚合,只获取类型客户和客户 ID 1
- a top_hit 聚合到客户 1 信息
或
- 报告 ID 过滤器
- 类型上的术语聚合
- 类型报告过滤器
- 一个top_hit聚合得到报告
- 关于客户 ID 的术语聚合
- 一个 top_hit 聚合以检索每个客户的信息
热门点击聚合大小
不要忘记在您的 top_hit 中提供 size
否则您将只会得到 top 3
正在阅读 elasticsearch 的第一行 aggregations definition 我认为你不太了解它是如何工作的:
The aggregations framework helps provide aggregated data based on a
search query
由于您的查询根本没有任何过滤器,返回 ALL hits.hits
对象中存储的文档 是预期结果。
然后您使用 filter
聚合来帮助您获得所需的文档,但它们位于返回的 dict
的 aggs
属性 中
如果我是对的,我建议你尽可能简单,所以这是我猜的查询
{
"query": {
"filtered": {
"filter": {
"nested": {
"path" : "customers",
"filter": {
"bool": {
"must" : [
"term": {"customer_id" : "1"}
]
}
}
}
}
}
},
"aggregations": {
"tophits_agg": {
"top_hits": {}
}
}
}
我在 ES 5 中映射了以下文档:
{
"appName" : {
"mappings" : {
"market_audit" : {
"properties" : {
"generation_date": {
"type": "date"
},
"customers" : {
"type" : "nested",
"properties" : {
"customer_id" : {
"type" : "integer"
},
[... other properties ...]
}
"customers" 节点中的多个条目可能具有相同的 customer_id,我试图仅检索具有特定 customer_id(即“1”)的条目与顶级文件的"generation_date"(只处理最新的文件)。
我能够提出以下查询:
{
"query": {},
"sort": [
{ "generation_date": "desc" }
],
"size": 1,
"aggregations": {
"nested": {
"nested": {
"path": "customers"
},
"aggregations": {
"filter": {
"filter": {
"match": {
"customers.customer_id": {
"query": "1"
}
}
},
"aggregations": {
"tophits_agg": {
"top_hits": {}
}
}
}
}
}
}
}
此查询获取我感兴趣的数据,位于 "aggregations" 数组中(连同包含整个文档的 "hits" 数组)。这里的问题是我使用的框架(ONGR 的 ElasticSearch 捆绑包和 DSL 捆绑包,使用 Symfony3)每次我尝试访问没有可用桶的实际数据时都会抱怨。
我已阅读 ES 文档,但无法提出添加存储桶的有效查询。我确定我遗漏了一些东西,非常欢迎提供一点帮助。如果您对如何适当修改查询有想法,我想我可以想出 PHP 代码来生成它。
编辑:因为这个问题有一些观点但没有答案(而且我仍然卡住了),我会接受任何允许我检索有关特定 "customer" 信息的查询(使用 customer_id) 来自最新生成的文档(根据 "generation_date" 字段)。我给出的查询正是我能够想到的,而且我很确定有更好的方法来做到这一点。有什么建议吗?
编辑 2: 这是发送到 ES 的数据:
{
"index": {
"_type": "market_data_audit_document"
}
}
{
"customers": [
{
"customer_id": 1,
"colocation_name": "colo1",
"colocation_id": 26,
"device_name": "device 1",
"channels": [
{
"name": "channel1-5",
"multicast":"1.2.1.5",
"sugar_state":4,
"network_state":1
}
]
},
{
"customer_id":2,
"colocation_name":"colo2",
"colocation_id":27,
"device_name":"device 2",
"channels": [
{
"name":"channel2-5",
"multicast":"1.2.2.5",
"sugar_state":4,
"network_state":1
}
]
},
{
"customer_id":3,
"colocation_name":"colo3",
"colocation_id":28,
"device_name":"device 3",
"channels": [
{
"name":"channel3-5",
"multicast":"1.2.3.5",
"sugar_state":4,
"network_state":1
}
]
},
{
"customer_id":4,
"colocation_name":"colo4",
"colocation_id":29,
"device_name":"device 4"
,"channels": [
{
"name":"channel4-5",
"multicast":"1.2.4.5",
"sugar_state":4,
"network_state":1
}
]
},
{
"customer_id":5,
"colocation_name":"colo5",
"colocation_id":30,
"device_name":"device 5",
"channels": [
{
"name":"channel5-5",
"multicast":"1.2.5.5",
"sugar_state":4,
"network_state":1
}
]
}
],
"generation_date":"2017-02-27T10:55:45+0100"
}
不幸的是,当我尝试发送此 post 中列出的查询时,我发现聚合没有按照我的预期进行:它 returns "good" 数据,而是来自ALL的存储文件!这是一个输出示例:
{
"timed_out" : false,
"took" : 60,
"hits" : {
"total" : 2,
"hits" : [
{
"_source" : {
"customers" : [
{
"colocation_id" : 26,
"channels" : [
{
"name" : "channel1-5",
"sugar_state" : 4,
"network_state" : 1,
"multicast" : "1.2.1.5"
}
],
"customer_id" : 1,
"colocation_name" : "colo1",
"device_name" : "device 1"
},
{
"colocation_id" : 27,
"channels" : [
{
"multicast" : "1.2.2.5",
"network_state" : 1,
"name" : "channel2-5",
"sugar_state" : 4
}
],
"customer_id" : 2,
"device_name" : "device 2",
"colocation_name" : "colo2"
},
{
"device_name" : "device 3",
"colocation_name" : "colo3",
"customer_id" : 3,
"channels" : [
{
"multicast" : "1.2.3.5",
"network_state" : 1,
"sugar_state" : 4,
"name" : "channel3-5"
}
],
"colocation_id" : 28
},
{
"channels" : [
{
"sugar_state" : 4,
"name" : "channel4-5",
"multicast" : "1.2.4.5",
"network_state" : 1
}
],
"customer_id" : 4,
"colocation_id" : 29,
"colocation_name" : "colo4",
"device_name" : "device 4"
},
{
"device_name" : "device 5",
"colocation_name" : "colo5",
"colocation_id" : 30,
"channels" : [
{
"sugar_state" : 4,
"name" : "channel5-5",
"multicast" : "1.2.5.5",
"network_state" : 1
}
],
"customer_id" : 5
}
],
"generation_date" : "2017-02-27T11:45:37+0100"
},
"_type" : "market_data_audit_document",
"sort" : [
1488192337000
],
"_index" : "mars",
"_score" : null,
"_id" : "AVp_LPeJdrvi0cWb8CrL"
}
],
"max_score" : null
},
"aggregations" : {
"nested" : {
"doc_count" : 10,
"filter" : {
"doc_count" : 2,
"tophits_agg" : {
"hits" : {
"max_score" : 1,
"total" : 2,
"hits" : [
{
"_nested" : {
"offset" : 0,
"field" : "customers"
},
"_score" : 1,
"_source" : {
"channels" : [
{
"name" : "channel1-5",
"sugar_state" : 4,
"multicast" : "1.2.1.5",
"network_state" : 1
}
],
"customer_id" : 1,
"colocation_id" : 26,
"colocation_name" : "colo1",
"device_name" : "device 1"
}
},
{
"_source" : {
"colocation_id" : 26,
"customer_id" : 1,
"channels" : [
{
"multicast" : "1.2.1.5",
"network_state" : 1,
"name" : "channel1-5",
"sugar_state" : 4
}
],
"device_name" : "device 1",
"colocation_name" : "colo1"
},
"_nested" : {
"offset" : 0,
"field" : "customers"
},
"_score" : 1
}
]
}
}
}
}
},
"_shards" : {
"total" : 13,
"successful" : 1,
"failures" : [
{
"reason" : {
"index" : ".kibana",
"index_uuid" : "bTkwoysSQ0y8Tt9yYFRStg",
"type" : "query_shard_exception",
"reason" : "No mapping found for [generation_date] in order to sort on"
},
"shard" : 0,
"node" : "4ZUgOm4VRry6EtUK15UH3Q",
"index" : ".kibana"
},
{
"reason" : {
"index_uuid" : "lN2mVF9bRjuDtiBF2qACfA",
"index" : "archiv1_log",
"type" : "query_shard_exception",
"reason" : "No mapping found for [generation_date] in order to sort on"
},
"shard" : 0,
"node" : "4ZUgOm4VRry6EtUK15UH3Q",
"index" : "archiv1_log"
},
{
"index" : "archiv1_session",
"shard" : 0,
"node" : "4ZUgOm4VRry6EtUK15UH3Q",
"reason" : {
"type" : "query_shard_exception",
"index" : "archiv1_session",
"index_uuid" : "cmMAW04YTtCb0khEqHpNyA",
"reason" : "No mapping found for [generation_date] in order to sort on"
}
},
{
"shard" : 0,
"node" : "4ZUgOm4VRry6EtUK15UH3Q",
"reason" : {
"reason" : "No mapping found for [generation_date] in order to sort on",
"index" : "archiv1_users_dev",
"index_uuid" : "AH48gIf5T0CXSQaE7uvVRg",
"type" : "query_shard_exception"
},
"index" : "archiv1_users_dev"
}
],
"failed" : 12
}
}
根据您的描述:
- 你用一堆属性在 elasticsearch 上存储文档
- 每个文档都包含数组中的客户列表(嵌套文档)
- 您只想提取与 customer.id 相关的嵌套文档
- 您的库无法管理没有存储桶的 Elasticsearch 响应
- 您期望 Elasticsearch return 嵌套文档
问题
它存在 2 种聚合:
- 桶
- 指标
在您的情况下,您在 Nested Agg 下有 2 个聚合:Filter 和 Metric。 过滤器:
- Filter defines a single bucket of all the documents 但未在结果中提供 'bucket' 关键字。
- 热门点击是一个指标,不提供桶。
解决方法:
我怀疑您的 PHP 库能否正确处理嵌套聚合结果,但您可以使用 Filters 而不是 Filter Aggregations 来获取存储桶列表
{
"aggregations": {
"nested": {
"nested": {
"path": "customers"
},
"aggregations": {
"filters_customer": {
"filters": {
"filters": [
{
"match": {
"customers.customer_id": "1"
}
}
]
},
"aggregations": {
"top_hits_customer": {
"top_hits": {}
}
}
}
}
}
}
}
将提供如下内容:
{
"aggregations": {
"nested": {
"doc_count": 15,
"filters_customer": {
"buckets": [
{
"doc_count": 3,
"top_hits_customer": {
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_nested": {
"field": "customers",
"offset": 0
},
"_score": 1,
"_source": {
"customer_id": 1,
"foo": "bar"
}
},
{
"_nested": {
"field": "customers",
"offset": 0
},
"_score": 1,
"_source": {
"customer_id": 1,
"foo": "bar"
}
},
{
"_nested": {
"field": "customers",
"offset": 0
},
"_score": 1,
"_source": {
"customer_id": 1,
"foo": "bar"
}
}
]
}
}
}
]
}
}
}
}
关于您的 EDIT 2 的注意事项
Elasticsearch 将搜索所有文档,而不是根据您的报告日期搜索 'TOP 1' 文档。按报告拆分结果的一种方法是在报告日期使用术语桶:
{
"query": {},
"size": 0,
"aggregations": {
"grp_report": {
"terms": {
"field": "generation_date"
},
"aggregations": {
"nested_customers": {
"nested": {
"path": "customers"
},
"aggregations": {
"filters_customer": {
"filters": {
"filters": [
{
"match": {
"customers.customer_id": "1"
}
}
]
},
"aggregations": {
"top_hits_customer": {
"top_hits": {}
}
}
}
}
}
}
}
}
}
建议:
避免使用复杂的文档,更喜欢将您的报告拆分为具有相关键(例如 reportId)的小文档。您将能够在没有任何嵌套文档的情况下轻松地进行过滤和聚合。在 上添加客户文档信息,您将过滤所有类型(在这种情况下冗余不是问题)。
用例示例:
- 报告列表
- 根据报告显示客户信息
- 跨多个报告显示客户的历史记录
当前文档示例:/indexName/market_audit
{
"generation_date": "...",
"customers": [
{
"id": 1,
"foo": "bar 1"
},
{
"id": 2,
"foo": "bar 2"
},
{
"id": 3,
"foo": "bar 3"
}
]
}
修改后的文档:
/indexName/market_audit_report
{
"report_id" : "123456"
"generation_date": "...",
"foo":"bar"
}
/indexName/market_audit_客户文件
{
"report_id" : "123456"
"customer_id": 1,
"foo": "bar 1"
}
{
"report_id" : "123456"
"customer_id": 2,
"foo": "bar 2"
}
{
"report_id" : "123456"
"customer_id": 3,
"foo": "bar 3"
}
如果您知道您的报告 ID,您将能够在一个请求中获取所有数据:
- 报告 ID 过滤器
- 类型上的术语聚合
- 类型报告过滤器
- 一个top_hit聚合得到报告
- 过滤聚合,只获取类型客户和客户 ID 1
- a top_hit 聚合到客户 1 信息
- 类型报告过滤器
或
- 报告 ID 过滤器
- 类型上的术语聚合
- 类型报告过滤器
- 一个top_hit聚合得到报告
- 关于客户 ID 的术语聚合
- 一个 top_hit 聚合以检索每个客户的信息
- 类型报告过滤器
热门点击聚合大小
不要忘记在您的 top_hit 中提供 size
否则您将只会得到 top 3
正在阅读 elasticsearch 的第一行 aggregations definition 我认为你不太了解它是如何工作的:
The aggregations framework helps provide aggregated data based on a search query
由于您的查询根本没有任何过滤器,返回 ALL hits.hits
对象中存储的文档 是预期结果。
然后您使用 filter
聚合来帮助您获得所需的文档,但它们位于返回的 dict
的 aggs
属性 中
如果我是对的,我建议你尽可能简单,所以这是我猜的查询
{
"query": {
"filtered": {
"filter": {
"nested": {
"path" : "customers",
"filter": {
"bool": {
"must" : [
"term": {"customer_id" : "1"}
]
}
}
}
}
}
},
"aggregations": {
"tophits_agg": {
"top_hits": {}
}
}
}