使用来自查询同一集合的任意数量的过滤条件进行查询

Question

const score_schema = mongoose.Schema(
  {
    tester_id: {
      type: mongoose.Schema.Types.ObjectId,
      ref: "user",
      required: true,
    },
    test_id: {
      type: mongoose.Schema.Types.ObjectId,
      ref: "test",
      required: true,
    },
    score: {
      type: Number,
      required: true,
    },
  },
  {
    collection: `score`,
    timestamps: true,
  }
);

查询 1:

给定了一个用户。首先查询 score 模型以查找该用户已参加的所有测试。这将导致任意数量的测试和每个测试的分数。

查询 2：

对 score 模型进行另一个查询，以查找 test_id 与上述查询返回的 test_id 相匹配的所有文档并且每个测试的分数大于或等于从上面的查询返回的 test_id 的分数。

这实质上意味着从query 1返回的任意数量的文档将是过滤条件到query 2的数量。

问题：

能否将上述 2 个查询合并为 1 个查询，从而往返 MongoDB API？如果不是，那么2个单独查询的解决方案也是可以接受的。

const dummy_data = [
  {
    tester_id: "1",
    test_id: "1",
    score: 40
  },
  {
    tester_id: "1",
    test_id: "2",
    score: 50
  },
   {
    tester_id: "1",
    test_id: "3",
    score: 70
  },
  {
    tester_id: "2",
    test_id: "1",
    score: 50
  },
  {
    tester_id: "3",
    test_id: "2",
    score: 20
  },
   {
    tester_id: "3",
    test_id: "3",
    score: 60
  },
  {
    tester_id: "7",
    test_id: "5",
    score: 40
  },
  {
    tester_id: "8",
    test_id: "4",
    score: 50
  },
   {
    tester_id: "9",
    test_id: "4",
    score: 70
  },
]

已编辑：

输出应与原始集合具有相同的架构，score_schema。

例如，给定 1 的 tester_id，输出应为：

[
  {
    tester_id: "2",
    test_id: "1",
    score: 50
  }
]

解释：

1的tester_id一共考了3次。只有一份文件，其中另一个 tester_id 参加的测试等于 tester_id 的 1 进行的三项测试中的一项，并且得分高于 tester_id 的 tester_id 生成的分数1.

输出不应包含原始文件 tester_id。在这种情况下，它是 tester_id 1.

Answer 1

您可以使用 aggregate query with $lookup

db.collection.aggregate([
  {
    $match: {
      "tester_id": "1"
    }
  },
  {
    "$lookup": {
      "from": "collection",
      "let": {
        "score": "$score",
        "tester_id": "$tester_id"
      },
      "localField": "tester_id",
      "foreignField": "test_id",
      "pipeline": [
        {
          $match: {
            $expr: {
              "$and": [
                {
                  $ne: [
                    "$tester_id",
                    "$$tester_id"
                  ]
                },
                {
                  $gte: [
                    "$score",
                    "$$score"
                  ]
                }
              ]
            }
          }
        },
        
      ],
      "as": "result"
    }
  },
  {
    "$unwind": "$result"
  },
  {
    $group: {
      "_id": null,
      "result": {
        $push: "$result"
      }
    }
  },
  {
    "$replaceWith": {
      "$mergeObjects": "$result"
    }
  },
  {
    $project: {
      "_id": 0
    }
  }
])

输出：

[
  {
    "score": 50,
    "test_id": "1",
    "tester_id": "2"
  }
]

Playground

Answer 2

下面是处理数据的详细聚合

如果我正确理解了所有要求

[{$facet: {
 tests_per_tester: [
  {
   $match: {
/***   select the tester here **/
    tester_id: '1'
   }
  },
  {
   $group: {
    _id: '$tester_id',
    tests: {
     $push: '$$ROOT'
    }
   }
  }
 ],
 scores_by_test: [
  {
   $group: {
    _id: '$test_id',
    scores: {
     $push: '$score'
    }
   }
  }
 ]
}}, {$unwind: {
 path: '$tests_per_tester'
}}, {$unwind: {
 path: '$tests_per_tester.tests'
}}, {$unwind: {
 path: '$scores_by_test'
}}, {$unwind: {
 path: '$scores_by_test.scores'
}}, {$match: {
 $expr: {
  $and: [
   {
    $eq: [
     '$tests_per_tester.tests.test_id',
     '$scores_by_test._id'
    ]
   },
   {
    $gte: [
     '$tests_per_tester.tests.score',
     '$scores_by_test.scores'
    ]
   }
  ]
 }
}}]

$facet 步骤将数据分为两类：测试人员和测试人员，而查询的其余部分只是一系列 $unwinds 和 $match 以提供所需的结果。

Mongo playground

Answer 3

这是一种方法。（评论在查询中。）

db.score.aggregate([
  { // the tester of interest
    "$match": { "tester_id": "1" }
  },
  {
    "$lookup": {
      // lookup by test_id
      "from": "score",
      "localField": "test_id",
      "foreignField": "test_id",
      "let": { "myId": "$tester_id", "myScore": "$score" },
      "pipeline": [
        { // only return docs of different tester
          // and higher score
          "$match": {
            "$expr": {
              "$and": [
                { "$ne": [ "$tester_id", "$$myId" ] },
                { "$gt": [ "$score", "$$myScore" ] }
              ]
            }
          }
        }
      ],
      "as": "higherScores"
    }
  },
  { // only keep non-empty higherScores
    "$match": {
      "$expr": { "$gt": [ { "$size": "$higherScores" }, 0 ] }
    }
  },
  { // only field we care about now
    "$project": { "higherScores": 1 }
  },
  { // might be more than one
    "$unwind": "$higherScores"
  },
  { // hoist it to ROOT
    "$replaceWith": "$higherScores"
  },
  { // don't want _id
    "$unset": "_id"
  }
])

在 mongoplayground.net 上试用。

Answer 4

另一种无需查找的方法是使用 $group，例如：

db.score.aggregate([
  {
    $group: {
      _id: "$test_id",
      highestScore: {$max: "$score"},
      tester_id: {$push: "$tester_id"},
      results: {
        $push: {score: "$score", "tester_id": "$tester_id"}
      },
      ourTester: {
        $push: {score: "$score", "tester_id": "$tester_id"}
      }
    }
  },
  {$match: {"tester_id": userId}},
  {
    $project: {
      ourTester: {
        $filter: {
          input: "$ourTester",
          as: "item",
          cond: {$eq: ["$$item.tester_id", userId]}
        }
      },
      results: {
        $filter: {
          input: "$results",
          as: "item",
          cond: {$eq: ["$$item.score", "$highestScore"]}}
      }
    }
  },
  {
    $project: {
      ourTester: {"$arrayElemAt": ["$ourTester", 0]},
      highest: {"$arrayElemAt": ["$results", 0]}
    }
  },
  {
    $match: {
      $expr: {$gt: ["$highest.score", "$ourTester.score"]}
    }
  },
  {
    $project: {
      score: "$highest.score",
      tester_id: "$highest.tester_id",
      test_id: "$res._id"
    }
  }
])

如你所见here

使用来自查询同一集合的任意数量的过滤条件进行查询

Query with arbitrary number of filter conditions that come from querying the same collection

performance

query-optimization

mongodb

nosql

aggregation-framework