匹配 query_string 个文档的分数
score for matching query_string documents
我目前正在处理一个非常烦人的查询,我需要来自 ES。
我的文档是嵌套文档,它们的索引看起来像这样:
"mydocs" : {
"properties" : {
"doc" : {
"type" : "nested",
"properties" : {
"name" : {"type" : "string", "store" : "yes", "index" : "analyzed"},
"tagln" : {"type" : "string", "store" : "yes", "index" : "analyzed"},
"tags" : {"type" : "string", "store" : "yes", "index" : "analyzed"},
"featured" : {"type" : "integer", "store" : "yes", "index" : "not_analyzed"}
"blkd" : {"type" : "integer", "store" : "yes", "index" : "not_analyzed"},
... etc ...
}
我正在尝试通过一种特殊的分数算法来提升名称、tagln 和标签字段,该算法将 featured*10000 + [is found in name]*1000 + [is found in tagln]*10 + [在标签中找到]*10。我的查询如下:
{
"from" : 0,
"size" : 10,
"query" : {
"nested" : {
"query" : {
"filtered" : {
"query" : {
"bool" : {
"must" : [ {
"term" : {
"doc.blkd" : 0
}
} ],
"should" : [ {
"function_score" : {
"functions" : [ {
"field_value_factor" : {
"field" : "doc.featured",
"factor" : 10000.0
}
} ],
"score_mode" : "sum",
"boost_mode" : "sum"
}
}, {
"constant_score" : {
"filter" : {
"query_string" : {
"query" : "featured*",
"fields" : [ "doc.name^1000.0" ]
}
},
"boost" : 1000.0
}
}, {
"constant_score" : {
"filter" : {
"query_string" : {
"query" : "featured*",
"fields" : [ "doc.tags^10.0" ],
"boost" : 10.0
}
}
}
}, {
"constant_score" : {
"filter" : {
"query_string" : {
"query" : "featured*",
"fields" : [ "doc.tagln^10.0" ],
"boost" : 10.0
}
}
}
} ],
"minimum_should_match" : "0"
}
}
}
},
"path" : "doc",
"score_mode" : "sum"
}
},
"explain" : false,
"sort" : [ {
"_score" : { }
} ]
}
分数没有考虑应该有的提升,特色的分数按预期工作,但 query_string 中的提升不起作用,
名称中带有 "aaa" 的文档得到 5 或 0 的小分数。而 featured=1 returns 的分数为 4000/6000/7500 等..
首先分数不是 10000+ 这很奇怪(可能是由于分数的许多因素)但是名称中匹配的查询字符串对分数没有任何明显的影响..
我怎样才能解决这个问题或至少更好地调试它(看看分数是如何建立的)?
尝试将 explain 更改为 true 但我得到的只是这个非常无用(或者对我来说可能不可读)的解释:
"_explanation": {
"value": 4000.0024,
"description": "sum of:",
"details": [
{
"value": 4000.0024,
"description": "Score based on child doc range from 387 to 387",
"details": []
},
{
"value": 0,
"description": "match on required clause, product of:",
"details": [
{
"value": 0,
"description": "# clause",
"details": []
},
{
"value": 0.0009999962,
"description": "-ConstantScore(_type:.percolator) #(+*:* -_type:__*), product of:",
"details": [
{
"value": 1,
"description": "boost",
"details": []
},
{
"value": 0.0009999962,
"description": "queryNorm",
"details": []
}
]
}
]
}
]
}
* 已编辑 *
感谢 keety 我能够提供更多信息:
添加 disable_coord-true 和 inner_hits explain-true 后
我已经尽我所能尝试了 "boosting" 和 query_string.. 查询如下:
{
"from" : 0,
"size" : 10,
"query" : {
"nested" : {
"query" : {
"filtered" : {
"query" : {
"bool" : {
"must" : [ {
"term" : {
"doc.blkd" : 0
}
} ],
"should" : [ {
"function_score" : {
"functions" : [ {
"field_value_factor" : {
"field" : "doc.featured",
"factor" : 10000.0
}
} ],
"score_mode" : "sum",
"boost_mode" : "sum"
}
}, {
"constant_score" : {
"filter" : {
"query_string" : {
"query" : "*featured*",
"fields" : [ "doc.name^1000.0" ]
}
},
"boost" : 1000.0
}
}, {
"query_string" : {
"query" : "*featured*",
"fields" : [ "doc.tags^100.0" ],
"boost" : 100.0
}
}, {
"constant_score" : {
"filter" : {
"query_string" : {
"query" : "*featured*",
"fields" : [ "doc.tagln^10.0" ],
"boost" : 10.0
}
}
}
} ],
"disable_coord" : true,
"minimum_should_match" : "0"
}
},
"filter" : {
"bool" : {
"should" : [ {
"query_string" : {
"query" : "*featured*",
"fields" : [ "doc.name^1000000.0", "doc.tags^10.0", "doc.tagln^10.0" ],
"boost" : 1000.0
}
} ],
"minimum_should_match" : "0"
}
}
}
},
"path" : "doc",
"score_mode" : "sum",
"inner_hits" : {
"explain" : "true"
}
}
},
"explain" : false,
"sort" : [ {
"_score" : { }
} ]
}
如您所见,我已将 query_string 添加到过滤器并将其中一个查询更改为不是 constant_score
文档的解释现在看起来像这样:
"max_score": 10001,
"hits": [
{
"_index": "myindex",
"_type": "mydocs",
"_id": "1111",
"_score": 10001,
"_ttl": 86158563,
"_source": {
"meta": {
"id": "1111",
"rev": "35-14602ccf5c3d429e0000000002000000",
"expiration": 0,
"flags": 33554432
},
"doc": {
"featured": 1,
"tagln": "hello location 1",
"blkd": 0,
"tags": [
"UsLocTaglinefeat"
],
"name": "hello US location featured"
}
},
"inner_hits": {
"doc": {
"hits": {
"total": 1,
"max_score": 10001,
"hits": [
{
"_shard": 1,
"_node": "YIXx2rrKR2O5q9519FIr_Q",
"_index": "myindex",
"_type": "mydocs",
"_id": "1111",
"_nested": {
"field": "doc",
"offset": 0
},
"_score": 10001,
"_source": {
"featured": 1,
"tagln": "hello location 1",
"blkd": 0,
"tags": [
"UsLocTaglinefeat"
],
"name": "hello US location featured"
},
"_explanation": {
"value": 10001,
"description": "sum of:",
"details": [
{
"value": 10001,
"description": "sum of:",
"details": [
{
"value": 0.0041682906,
"description": "weight(doc.blkd:`\b\u0000\u0000\u0000\u0000 in 0) [PerFieldSimilarity], result of:",
"details": [
{
"value": 0.0041682906,
"description": "score(doc=0,freq=1.0), product of:",
"details": [
{
"value": 0.0020365636,
"description": "queryWeight, product of:",
"details": [
{
"value": 2.0467274,
"description": "idf(docFreq=177, maxDocs=507)",
"details": []
},
{
"value": 0.0009950341,
"description": "queryNorm",
"details": []
}
]
},
{
"value": 2.0467274,
"description": "fieldWeight in 0, product of:",
"details": [
{
"value": 1,
"description": "tf(freq=1.0), with freq of:",
"details": [
{
"value": 1,
"description": "termFreq=1.0",
"details": []
}
]
},
{
"value": 2.0467274,
"description": "idf(docFreq=177, maxDocs=507)",
"details": []
},
{
"value": 1,
"description": "fieldNorm(doc=0)",
"details": []
}
]
}
]
}
]
},
{
"value": 10000.001,
"description": "sum of",
"details": [
{
"value": 0.0009950341,
"description": "*:*, product of:",
"details": [
{
"value": 1,
"description": "boost",
"details": []
},
{
"value": 0.0009950341,
"description": "queryNorm",
"details": []
}
]
},
{
"value": 10000,
"description": "min of:",
"details": [
{
"value": 10000,
"description": "field value function: none(doc['doc.featured'].value * factor=10000.0)",
"details": []
},
{
"value": 3.4028235e+38,
"description": "maxBoost",
"details": []
}
]
}
]
},
{
"value": 0.9950341,
"description": "ConstantScore(doc.name:*featured*), product of:",
"details": [
{
"value": 1000,
"description": "boost",
"details": []
},
{
"value": 0.0009950341,
"description": "queryNorm",
"details": []
}
]
}
]
},
{
"value": 0,
"description": "match on required clause, product of:",
"details": [
{
"value": 0,
"description": "# clause",
"details": []
},
{
"value": 0.0009950341,
"description": "((doc.name:*featured*)^1000000.0 | (doc.tags:*featured*)^10.0 | (doc.tagln:*featured*)^10.0), product of:",
"details": [
{
"value": 1,
"description": "boost",
"details": []
},
{
"value": 0.0009950341,
"description": "queryNorm",
"details": []
}
]
}
]
}
]
}
}
]
}
}
}
},
似乎唯一 query_string 以任何方式影响分数的是过滤器内部的那个,但我似乎无法提高它的分数...
欢迎任何提示:)谢谢
对于 OP 中的查询,您需要在 bool 查询中启用 disable_coord 以获得所需的行为。
同时启用 inner_hits 并在其中设置 explain:true
将提供嵌套文档的评分详细信息。此功能在 elasticsearch 1.5 及更高版本中可用。
示例:
{
"query": {
"nested": {
"query": {
"filtered": {
"query": {
"bool": {
"disable_coord": "true",
"must": [
{
"term": {
"doc.blkd": 0
}
}
],
"should": [
{
"function_score": {
"functions": [
{
"field_value_factor": {
"field": "doc.featured",
"factor": 10000
}
}
],
"score_mode": "sum",
"boost_mode": "sum"
}
},
{
"constant_score": {
"filter": {
"query_string": {
"query": "featured*",
"fields": [
"doc.name^1000.0"
]
}
},
"boost": 1000
}
},
{
"constant_score": {
"filter": {
"query_string": {
"query": "featured*",
"fields": [
"doc.tags^10.0"
],
"boost": 10
}
}
}
},
{
"constant_score": {
"filter": {
"query_string": {
"query": "featured*",
"fields": [
"doc.tagln^10.0"
],
"boost": 10
}
}
}
}
],
"minimum_should_match": "0"
}
}
}
},
"path": "doc",
"score_mode": "sum",
"inner_hits" : {
"explain" : "true"
}
}
}
}
已编辑
此外,使用函数 score 重写查询可能会更简单,如下例所示。
{
"query": {
"nested": {
"query": {
"function_score": {
"functions": [
{
"field_value_factor": {
"field": "doc.featured",
"factor": 10000
}
},
{
"filter": {
"query_string": {
"query": "*featured*",
"fields": [
"doc.name^1000.0"
]
}
},
"weight": 1000
},
{
"filter": {
"query_string": {
"query": "*featured*",
"fields": [
"doc.tags^1000.0"
]
}
},
"weight": 100
},
{
"weight": 10,
"filter": {
"query_string": {
"query": "*featured*",
"fields": [
"doc.tagln^10.0"
]
}
}
}
],
"query": {
"term": {
"doc.blkd": 0
}
},
"score_mode": "sum",
"boost_mode": "sum"
}
},
"path": "doc",
"score_mode": "sum",
"inner_hits": {
"explain": "true"
}
}
}
}
"score_mode" : "sum",
"boost_mode" : "sum"
是我的问题.. ES 正在对整个分数进行归一化,但结果很奇怪。
感谢 keety 的 inner_hits 解释.. 它对我帮助很大!
我目前正在处理一个非常烦人的查询,我需要来自 ES。 我的文档是嵌套文档,它们的索引看起来像这样:
"mydocs" : {
"properties" : {
"doc" : {
"type" : "nested",
"properties" : {
"name" : {"type" : "string", "store" : "yes", "index" : "analyzed"},
"tagln" : {"type" : "string", "store" : "yes", "index" : "analyzed"},
"tags" : {"type" : "string", "store" : "yes", "index" : "analyzed"},
"featured" : {"type" : "integer", "store" : "yes", "index" : "not_analyzed"}
"blkd" : {"type" : "integer", "store" : "yes", "index" : "not_analyzed"},
... etc ...
}
我正在尝试通过一种特殊的分数算法来提升名称、tagln 和标签字段,该算法将 featured*10000 + [is found in name]*1000 + [is found in tagln]*10 + [在标签中找到]*10。我的查询如下:
{
"from" : 0,
"size" : 10,
"query" : {
"nested" : {
"query" : {
"filtered" : {
"query" : {
"bool" : {
"must" : [ {
"term" : {
"doc.blkd" : 0
}
} ],
"should" : [ {
"function_score" : {
"functions" : [ {
"field_value_factor" : {
"field" : "doc.featured",
"factor" : 10000.0
}
} ],
"score_mode" : "sum",
"boost_mode" : "sum"
}
}, {
"constant_score" : {
"filter" : {
"query_string" : {
"query" : "featured*",
"fields" : [ "doc.name^1000.0" ]
}
},
"boost" : 1000.0
}
}, {
"constant_score" : {
"filter" : {
"query_string" : {
"query" : "featured*",
"fields" : [ "doc.tags^10.0" ],
"boost" : 10.0
}
}
}
}, {
"constant_score" : {
"filter" : {
"query_string" : {
"query" : "featured*",
"fields" : [ "doc.tagln^10.0" ],
"boost" : 10.0
}
}
}
} ],
"minimum_should_match" : "0"
}
}
}
},
"path" : "doc",
"score_mode" : "sum"
}
},
"explain" : false,
"sort" : [ {
"_score" : { }
} ]
}
分数没有考虑应该有的提升,特色的分数按预期工作,但 query_string 中的提升不起作用, 名称中带有 "aaa" 的文档得到 5 或 0 的小分数。而 featured=1 returns 的分数为 4000/6000/7500 等..
首先分数不是 10000+ 这很奇怪(可能是由于分数的许多因素)但是名称中匹配的查询字符串对分数没有任何明显的影响..
我怎样才能解决这个问题或至少更好地调试它(看看分数是如何建立的)? 尝试将 explain 更改为 true 但我得到的只是这个非常无用(或者对我来说可能不可读)的解释:
"_explanation": {
"value": 4000.0024,
"description": "sum of:",
"details": [
{
"value": 4000.0024,
"description": "Score based on child doc range from 387 to 387",
"details": []
},
{
"value": 0,
"description": "match on required clause, product of:",
"details": [
{
"value": 0,
"description": "# clause",
"details": []
},
{
"value": 0.0009999962,
"description": "-ConstantScore(_type:.percolator) #(+*:* -_type:__*), product of:",
"details": [
{
"value": 1,
"description": "boost",
"details": []
},
{
"value": 0.0009999962,
"description": "queryNorm",
"details": []
}
]
}
]
}
]
}
* 已编辑 *
感谢 keety 我能够提供更多信息: 添加 disable_coord-true 和 inner_hits explain-true 后 我已经尽我所能尝试了 "boosting" 和 query_string.. 查询如下:
{
"from" : 0,
"size" : 10,
"query" : {
"nested" : {
"query" : {
"filtered" : {
"query" : {
"bool" : {
"must" : [ {
"term" : {
"doc.blkd" : 0
}
} ],
"should" : [ {
"function_score" : {
"functions" : [ {
"field_value_factor" : {
"field" : "doc.featured",
"factor" : 10000.0
}
} ],
"score_mode" : "sum",
"boost_mode" : "sum"
}
}, {
"constant_score" : {
"filter" : {
"query_string" : {
"query" : "*featured*",
"fields" : [ "doc.name^1000.0" ]
}
},
"boost" : 1000.0
}
}, {
"query_string" : {
"query" : "*featured*",
"fields" : [ "doc.tags^100.0" ],
"boost" : 100.0
}
}, {
"constant_score" : {
"filter" : {
"query_string" : {
"query" : "*featured*",
"fields" : [ "doc.tagln^10.0" ],
"boost" : 10.0
}
}
}
} ],
"disable_coord" : true,
"minimum_should_match" : "0"
}
},
"filter" : {
"bool" : {
"should" : [ {
"query_string" : {
"query" : "*featured*",
"fields" : [ "doc.name^1000000.0", "doc.tags^10.0", "doc.tagln^10.0" ],
"boost" : 1000.0
}
} ],
"minimum_should_match" : "0"
}
}
}
},
"path" : "doc",
"score_mode" : "sum",
"inner_hits" : {
"explain" : "true"
}
}
},
"explain" : false,
"sort" : [ {
"_score" : { }
} ]
}
如您所见,我已将 query_string 添加到过滤器并将其中一个查询更改为不是 constant_score
文档的解释现在看起来像这样:
"max_score": 10001,
"hits": [
{
"_index": "myindex",
"_type": "mydocs",
"_id": "1111",
"_score": 10001,
"_ttl": 86158563,
"_source": {
"meta": {
"id": "1111",
"rev": "35-14602ccf5c3d429e0000000002000000",
"expiration": 0,
"flags": 33554432
},
"doc": {
"featured": 1,
"tagln": "hello location 1",
"blkd": 0,
"tags": [
"UsLocTaglinefeat"
],
"name": "hello US location featured"
}
},
"inner_hits": {
"doc": {
"hits": {
"total": 1,
"max_score": 10001,
"hits": [
{
"_shard": 1,
"_node": "YIXx2rrKR2O5q9519FIr_Q",
"_index": "myindex",
"_type": "mydocs",
"_id": "1111",
"_nested": {
"field": "doc",
"offset": 0
},
"_score": 10001,
"_source": {
"featured": 1,
"tagln": "hello location 1",
"blkd": 0,
"tags": [
"UsLocTaglinefeat"
],
"name": "hello US location featured"
},
"_explanation": {
"value": 10001,
"description": "sum of:",
"details": [
{
"value": 10001,
"description": "sum of:",
"details": [
{
"value": 0.0041682906,
"description": "weight(doc.blkd:`\b\u0000\u0000\u0000\u0000 in 0) [PerFieldSimilarity], result of:",
"details": [
{
"value": 0.0041682906,
"description": "score(doc=0,freq=1.0), product of:",
"details": [
{
"value": 0.0020365636,
"description": "queryWeight, product of:",
"details": [
{
"value": 2.0467274,
"description": "idf(docFreq=177, maxDocs=507)",
"details": []
},
{
"value": 0.0009950341,
"description": "queryNorm",
"details": []
}
]
},
{
"value": 2.0467274,
"description": "fieldWeight in 0, product of:",
"details": [
{
"value": 1,
"description": "tf(freq=1.0), with freq of:",
"details": [
{
"value": 1,
"description": "termFreq=1.0",
"details": []
}
]
},
{
"value": 2.0467274,
"description": "idf(docFreq=177, maxDocs=507)",
"details": []
},
{
"value": 1,
"description": "fieldNorm(doc=0)",
"details": []
}
]
}
]
}
]
},
{
"value": 10000.001,
"description": "sum of",
"details": [
{
"value": 0.0009950341,
"description": "*:*, product of:",
"details": [
{
"value": 1,
"description": "boost",
"details": []
},
{
"value": 0.0009950341,
"description": "queryNorm",
"details": []
}
]
},
{
"value": 10000,
"description": "min of:",
"details": [
{
"value": 10000,
"description": "field value function: none(doc['doc.featured'].value * factor=10000.0)",
"details": []
},
{
"value": 3.4028235e+38,
"description": "maxBoost",
"details": []
}
]
}
]
},
{
"value": 0.9950341,
"description": "ConstantScore(doc.name:*featured*), product of:",
"details": [
{
"value": 1000,
"description": "boost",
"details": []
},
{
"value": 0.0009950341,
"description": "queryNorm",
"details": []
}
]
}
]
},
{
"value": 0,
"description": "match on required clause, product of:",
"details": [
{
"value": 0,
"description": "# clause",
"details": []
},
{
"value": 0.0009950341,
"description": "((doc.name:*featured*)^1000000.0 | (doc.tags:*featured*)^10.0 | (doc.tagln:*featured*)^10.0), product of:",
"details": [
{
"value": 1,
"description": "boost",
"details": []
},
{
"value": 0.0009950341,
"description": "queryNorm",
"details": []
}
]
}
]
}
]
}
}
]
}
}
}
},
似乎唯一 query_string 以任何方式影响分数的是过滤器内部的那个,但我似乎无法提高它的分数... 欢迎任何提示:)谢谢
对于 OP 中的查询,您需要在 bool 查询中启用 disable_coord 以获得所需的行为。
同时启用 inner_hits 并在其中设置 explain:true
将提供嵌套文档的评分详细信息。此功能在 elasticsearch 1.5 及更高版本中可用。
示例:
{
"query": {
"nested": {
"query": {
"filtered": {
"query": {
"bool": {
"disable_coord": "true",
"must": [
{
"term": {
"doc.blkd": 0
}
}
],
"should": [
{
"function_score": {
"functions": [
{
"field_value_factor": {
"field": "doc.featured",
"factor": 10000
}
}
],
"score_mode": "sum",
"boost_mode": "sum"
}
},
{
"constant_score": {
"filter": {
"query_string": {
"query": "featured*",
"fields": [
"doc.name^1000.0"
]
}
},
"boost": 1000
}
},
{
"constant_score": {
"filter": {
"query_string": {
"query": "featured*",
"fields": [
"doc.tags^10.0"
],
"boost": 10
}
}
}
},
{
"constant_score": {
"filter": {
"query_string": {
"query": "featured*",
"fields": [
"doc.tagln^10.0"
],
"boost": 10
}
}
}
}
],
"minimum_should_match": "0"
}
}
}
},
"path": "doc",
"score_mode": "sum",
"inner_hits" : {
"explain" : "true"
}
}
}
}
已编辑
此外,使用函数 score 重写查询可能会更简单,如下例所示。
{
"query": {
"nested": {
"query": {
"function_score": {
"functions": [
{
"field_value_factor": {
"field": "doc.featured",
"factor": 10000
}
},
{
"filter": {
"query_string": {
"query": "*featured*",
"fields": [
"doc.name^1000.0"
]
}
},
"weight": 1000
},
{
"filter": {
"query_string": {
"query": "*featured*",
"fields": [
"doc.tags^1000.0"
]
}
},
"weight": 100
},
{
"weight": 10,
"filter": {
"query_string": {
"query": "*featured*",
"fields": [
"doc.tagln^10.0"
]
}
}
}
],
"query": {
"term": {
"doc.blkd": 0
}
},
"score_mode": "sum",
"boost_mode": "sum"
}
},
"path": "doc",
"score_mode": "sum",
"inner_hits": {
"explain": "true"
}
}
}
}
"score_mode" : "sum",
"boost_mode" : "sum"
是我的问题.. ES 正在对整个分数进行归一化,但结果很奇怪。
感谢 keety 的 inner_hits 解释.. 它对我帮助很大!