按相关性能问题排序

Sorting by relevance performance issues

猫鼬(和/或mongodb)中是否有函数/方法可用于根据相关性(匹配查询参数的最大数量)对查询结果进行排序?

下面的示例是我目前正在使用的示例(查询使用 $in:[],否则相同)- 我的集合非常小,因此性能很好,但在较大的集合上,它会显着降低速度。

或者,如果有更好的性能方法(猫鼬/mongodb 之外),我很乐意知道。

示例:

var docs = [
    {
        fruits: ['apple', 'orange', 'tomato'],
        colors: ['blue', 'green'],
        // relevance: 3
    },
    {
        fruits: ['apple', 'carrot'],
        colors: ['red', 'green'],
        // relevance: 2
    }
]

var query = {fruits: ['apple', 'orange'], colors: ['green']}

docs.forEach(function(doc){
    var relevance = 0
    Object.keys(query).forEach(function(_query){
        var arrays = [doc[_query], query[_query]]
        var result = arrays.shift().filter(function(v) {
            return arrays.every(function(a) {
                return a.indexOf(v) !== -1;
            });
        });
        relevance += result.length
    })
    doc.relevance = relevance
})

结果:

var docs = [
    {
        fruits: ['apple', 'orange', 'tomato'],
        colors: ['blue', 'green'],
        relevance: 3
    },
    {
        fruits: ['apple', 'carrot'],
        colors: ['red', 'green'],
        relevance: 2
    }
]

你可以通过聚合来做到这一点:

db.getCollection('docs').aggregate([
{$match: {fruits: {$in: ['apple', 'orange']}, colors: {$in: ['green']}}},
{$project: {
    relevance: {
        $sum: [
          {$cond: {if: { "$setIsSubset": [['orange'], "$fruits" ]}, then: 1, else: 0}},
          {$cond: {if: { "$setIsSubset": [['apple'], "$fruits" ]}, then: 1, else: 0}},
          {$cond: {if: { "$setIsSubset": [['green'], "$colors" ]}, then: 1, else: 0}}]
    },
    doc: '$$ROOT'}}
])

结果:

/* 1 */
{
    "_id" : ObjectId("57be8a9b65d2835e960df543"),
    "relevance" : 3,
    "doc" : {
        "_id" : ObjectId("57be8a9b65d2835e960df543"),
        "fruits" : [ 
            "apple", 
            "orange", 
            "tomato"
        ],
        "colors" : [ 
            "blue", 
            "green"
        ]
    }
}

/* 2 */
{
    "_id" : ObjectId("57be8aa865d2835e960df544"),
    "relevance" : 2,
    "doc" : {
        "_id" : ObjectId("57be8aa865d2835e960df544"),
        "fruits" : [ 
            "apple", 
            "carrot"
        ],
        "colors" : [ 
            "red", 
            "green"
        ]
    }
}