Elastic Search 查询特定类型数组中的 return 个术语

Elastic Search query return terms within array of a specific type

我有一个索引映射如下:

{"tagged_index":{"mappings":{"tagged":{"properties":{"tags":{"properties":{"resources":{"properties":{"tagName":{"type":"string"},"type":{"type":"string"}}}}},"content":{"type":"string"}}}}}}

其中 Resources 是一个可以有多个标签的数组。例如

{"_id":"82906194","_source":{"tags":{"resources":[{"type":"Person","tagName":"Kim_Kardashian",},{"type":"Person","tagName":"Kanye_West",},{"type":"City","tagName":"New_York",},...},"content":" Popular NEWS ..."}} , {"_id":"82906195","_source":{"tags":{"resources":[{"type":"City","tagName":"London",},{"type":"Country","tagName":"USA",},{"type":"Music","tagName":"Hello",},...},"content":" Adele's Hello..."}}, ...

我知道如何使用以下查询提取重要的术语[tagName],但我不需要所有类型的术语[tagName]。 我怎样才能只提取例如仅城市 [type:City] 的术语? (我想获取类型为城市的 tagName 列表,即伦敦、New_York、柏林...)

{"size":0,"query":{"filtered":{"query":{"query_string":{"query":"*","analyze_wildcard":true}}}},"aggs":{"Cities":{"terms":{"field":"tags.resources.tagName","size":10,"order":{"_count":"desc"}}}}}

所需的输出如下所示:

{"took":1200,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":5179261,"max_score":0.0,"hits":[]},"aggregations":{"Cities":{"doc_count_error_upper_bound":46737,"sum_other_doc_count":36037440,"buckets":[{"key":"London","doc_count":332820},{"key":"New_York","doc_count":211274},{"key":"Berlin","doc_count":156954},{"key":"Amsterdam","doc_count":132173},...

你能试试这个吗:

{
"_source" : ["tags.resources.tagName"]
 "query": {
    "term": {
       "tags.resources.type": {
          "value": "City"
        }
     }
  }
} 

以上查询将获取城市类型的资源,前提是资源是 object 类型。

编辑后

问题 分组依据 Tag name 属于 city 类型。使用您当前的映射无法实现这一点。您必须将资源字段更改为 nested 类型。

映射看起来像。

 "mappings": {
     "resource": {
        "properties": {
           "tags": {
              "properties": {
                 "content": {
                    "type": "string"
                 },
                 "resources": {
                    "type": "nested",
                    "properties": {
                       "tagName": {
                          "type": "string"
                       },
                       "type": {
                          "type": "string"
                       }
                    }
                 }
              }
           }
        }
     }
  }

最终查询将是:

{
 "size": 0,
 "query": {
  "nested": {
     "path": "tags.resources",
     "query": {
        "match": {
           "tags.resources.type": "city"
         }
      }
    }
  },
  "aggs": {
  "resources Nested path": {
     "nested": {
        "path": "tags.resources"
     },
     "aggs": {
        "city type": {
           "filter": {
              "term": {
                 "tags.resources.type": "city"
              }
           },
           "aggs": {
              "group By tagName": {
                 "terms": {
                    "field": "tags.resources.tagName"
                 }
              }
           }
         }
       }
     }
   }
 }  

输出将是:

"aggregations": {
  "resources Nested path": {
     "doc_count": 6,
     "city type": {
        "doc_count": 2,
        "group By tagName": {
           "doc_count_error_upper_bound": 0,
           "sum_other_doc_count": 0,
           "buckets": [
              {
                 "key": "london",
                 "doc_count": 1
              },
              {
                 "key": "new_york",
                 "doc_count": 1
              }
           ]
         }
       }
     } 
   }