按相关性能问题排序
Sorting by relevance performance issues
猫鼬(和/或mongodb)中是否有函数/方法可用于根据相关性(匹配查询参数的最大数量)对查询结果进行排序?
下面的示例是我目前正在使用的示例(查询使用 $in:[]
,否则相同)- 我的集合非常小,因此性能很好,但在较大的集合上,它会显着降低速度。
或者,如果有更好的性能方法(猫鼬/mongodb 之外),我很乐意知道。
示例:
var docs = [
{
fruits: ['apple', 'orange', 'tomato'],
colors: ['blue', 'green'],
// relevance: 3
},
{
fruits: ['apple', 'carrot'],
colors: ['red', 'green'],
// relevance: 2
}
]
var query = {fruits: ['apple', 'orange'], colors: ['green']}
docs.forEach(function(doc){
var relevance = 0
Object.keys(query).forEach(function(_query){
var arrays = [doc[_query], query[_query]]
var result = arrays.shift().filter(function(v) {
return arrays.every(function(a) {
return a.indexOf(v) !== -1;
});
});
relevance += result.length
})
doc.relevance = relevance
})
结果:
var docs = [
{
fruits: ['apple', 'orange', 'tomato'],
colors: ['blue', 'green'],
relevance: 3
},
{
fruits: ['apple', 'carrot'],
colors: ['red', 'green'],
relevance: 2
}
]
你可以通过聚合来做到这一点:
db.getCollection('docs').aggregate([
{$match: {fruits: {$in: ['apple', 'orange']}, colors: {$in: ['green']}}},
{$project: {
relevance: {
$sum: [
{$cond: {if: { "$setIsSubset": [['orange'], "$fruits" ]}, then: 1, else: 0}},
{$cond: {if: { "$setIsSubset": [['apple'], "$fruits" ]}, then: 1, else: 0}},
{$cond: {if: { "$setIsSubset": [['green'], "$colors" ]}, then: 1, else: 0}}]
},
doc: '$$ROOT'}}
])
结果:
/* 1 */
{
"_id" : ObjectId("57be8a9b65d2835e960df543"),
"relevance" : 3,
"doc" : {
"_id" : ObjectId("57be8a9b65d2835e960df543"),
"fruits" : [
"apple",
"orange",
"tomato"
],
"colors" : [
"blue",
"green"
]
}
}
/* 2 */
{
"_id" : ObjectId("57be8aa865d2835e960df544"),
"relevance" : 2,
"doc" : {
"_id" : ObjectId("57be8aa865d2835e960df544"),
"fruits" : [
"apple",
"carrot"
],
"colors" : [
"red",
"green"
]
}
}
猫鼬(和/或mongodb)中是否有函数/方法可用于根据相关性(匹配查询参数的最大数量)对查询结果进行排序?
下面的示例是我目前正在使用的示例(查询使用 $in:[]
,否则相同)- 我的集合非常小,因此性能很好,但在较大的集合上,它会显着降低速度。
或者,如果有更好的性能方法(猫鼬/mongodb 之外),我很乐意知道。
示例:
var docs = [
{
fruits: ['apple', 'orange', 'tomato'],
colors: ['blue', 'green'],
// relevance: 3
},
{
fruits: ['apple', 'carrot'],
colors: ['red', 'green'],
// relevance: 2
}
]
var query = {fruits: ['apple', 'orange'], colors: ['green']}
docs.forEach(function(doc){
var relevance = 0
Object.keys(query).forEach(function(_query){
var arrays = [doc[_query], query[_query]]
var result = arrays.shift().filter(function(v) {
return arrays.every(function(a) {
return a.indexOf(v) !== -1;
});
});
relevance += result.length
})
doc.relevance = relevance
})
结果:
var docs = [
{
fruits: ['apple', 'orange', 'tomato'],
colors: ['blue', 'green'],
relevance: 3
},
{
fruits: ['apple', 'carrot'],
colors: ['red', 'green'],
relevance: 2
}
]
你可以通过聚合来做到这一点:
db.getCollection('docs').aggregate([
{$match: {fruits: {$in: ['apple', 'orange']}, colors: {$in: ['green']}}},
{$project: {
relevance: {
$sum: [
{$cond: {if: { "$setIsSubset": [['orange'], "$fruits" ]}, then: 1, else: 0}},
{$cond: {if: { "$setIsSubset": [['apple'], "$fruits" ]}, then: 1, else: 0}},
{$cond: {if: { "$setIsSubset": [['green'], "$colors" ]}, then: 1, else: 0}}]
},
doc: '$$ROOT'}}
])
结果:
/* 1 */
{
"_id" : ObjectId("57be8a9b65d2835e960df543"),
"relevance" : 3,
"doc" : {
"_id" : ObjectId("57be8a9b65d2835e960df543"),
"fruits" : [
"apple",
"orange",
"tomato"
],
"colors" : [
"blue",
"green"
]
}
}
/* 2 */
{
"_id" : ObjectId("57be8aa865d2835e960df544"),
"relevance" : 2,
"doc" : {
"_id" : ObjectId("57be8aa865d2835e960df544"),
"fruits" : [
"apple",
"carrot"
],
"colors" : [
"red",
"green"
]
}
}