ElasticSearch

Question

为简单起见，我将使用 elasticsearch 中的这个示例，这样我就可以更轻松地解释我的问题。我需要的是获得每个用户的最佳评论（最佳匹配）。

这个例子我在底部多加了一条评论

PUT /sales/_doc/1?refresh {
"tags": ["car", "auto"],
"comments": [
    {"username": "baddriver007", "comment": "This car could have better brakes"},
    {"username": "dr_who", "comment": "Where's the autopilot? Can't find it"},
    {"username": "ilovemotorbikes", "comment": "This car has two extra wheels"},
    {"username": "baddriver007", "comment": "This is fast car"}
]}

我还更改了查询，现在它匹配嵌套（评论）数组中的单词 fast。

POST /sales/_search {
"query": {
   "nested": {
     "path": "comments",
     "query": {
       "match": {
         "comments.comment": "fast"
       }
     }
   }
},
"aggs": {
    "by_sale": {
        "nested" : {
            "path" : "comments"
        },
        "aggs": {
            "by_user": {
                "terms": {
                    "field": "comments.username",
                    "size": 1
                },
                "aggs": {
                    "by_nested": {
                        "top_hits":{
                          "explain": true
                        }
                    }
                }
            }
        }
    }
}}

接下来，我希望响应能够独立地对每个评论进行评分，并且包含 fast 单词的评论得分更高，因为我使用的是 top_hits 指标。但是在响应中，用户 baddriver007 的两条评论都具有相同的分数，而 top_hits 的 _explanation 是 "description": "Not a match"。我错过了什么？

{
  "took": 7,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 1.3551694,
    "hits": [
      {
        "_index": "sales",
        "_type": "_doc",
        "_id": "1",
        "_score": 1.3551694,
        "_source": {
          "tags": [
            "car",
            "auto"
          ],
          "comments": [
            {
              "username": "baddriver007",
              "comment": "This car could have better brakes"
            },
            {
              "username": "dr_who",
              "comment": "Where's the autopilot? Can't find it"
            },
            {
              "username": "ilovemotorbikes",
              "comment": "This car has two extra wheels"
            },
            {
              "username": "baddriver007",
              "comment": "This is fast car"
            }
          ]
        }
      }
    ]
  },
  "aggregations": {
    "by_sale": {
      "doc_count": 4,
      "by_user": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 1,
        "buckets": [
          {
            "key": "baddriver007",
            "doc_count": 2,
            "by_nested": {
              "hits": {
                "total": 2,
                "max_score": 1.3551694,
                "hits": [
                  {
                    "_shard": "[sales][3]",
                    "_node": "-22psoQNRLa8_Y9GeHBXaw",
                    "_index": "sales",
                    "_type": "_doc",
                    "_id": "1",
                    "_nested": {
                      "field": "comments",
                      "offset": 3
                    },
                    "_score": 1.3551694,
                    "_source": {
                      "username": "baddriver007",
                      "comment": "This is fast car"
                    },
                    "_explanation": {
                      "value": 0,
                      "description": "Not a match",
                      "details": []
                    }
                  },
                  {
                    "_shard": "[sales][3]",
                    "_node": "-22psoQNRLa8_Y9GeHBXaw",
                    "_index": "sales",
                    "_type": "_doc",
                    "_id": "1",
                    "_nested": {
                      "field": "comments",
                      "offset": 0
                    },
                    "_score": 1.3551694,
                    "_source": {
                      "username": "baddriver007",
                      "comment": "This car could have better brakes"
                    },
                    "_explanation": {
                      "value": 0,
                      "description": "Not a match",
                      "details": []
                    }
                  }
                ]
              }
            }
          },
          {
            "key": "dr_who",
            "doc_count": 1,
            "by_nested": {
              "hits": {
                "total": 1,
                "max_score": 1.3551694,
                "hits": [
                  {
                    "_shard": "[sales][3]",
                    "_node": "-22psoQNRLa8_Y9GeHBXaw",
                    "_index": "sales",
                    "_type": "_doc",
                    "_id": "1",
                    "_nested": {
                      "field": "comments",
                      "offset": 1
                    },
                    "_score": 1.3551694,
                    "_source": {
                      "username": "dr_who",
                      "comment": "Where's the autopilot? Can't find it"
                    },
                    "_explanation": {
                      "value": 0,
                      "description": "Not a match",
                      "details": []
                    }
                  }
                ]
              }
            }
          }
        ]
      }
    }
  }
}

Answer 1

ElasticSearch 擅长于此，但您需要以不同方式构建文档。您想要 N 个文档，而不是一个包含 N 条评论的文档。

ElasticSearch - TopHits 对嵌套聚合中的所有命中给出相同的分数

ElasticSearch - TopHits give same score for all hits in nested aggregation

elasticsearch-aggregation