MongoDB:获取具有两个值的不同组合的最新文档
MongoDB: Get newest documents with distinct combination of two values
所以我有一个问题,如何对数据库形成一个稍微复杂一点的特定查询。
我有一个包含类似文件的集合:
[{
"_id": { "$oid" : "Object1" },
"created": { "$date" : "2021-11-14T10:58:01.456Z" },
"primaryId": "SomeId1",
"secondaryIdx": 0,
"otherData" : something
}
{
"_id": { "$oid" : "Object2" },
"created": { "$date" : "2021-11-13T10:58:01.456Z" },
"primaryId": "SomeId2",
"secondaryIdx": 0,
"otherData" : something
}
{
"_id": { "$oid" : "Object3" },
"created": { "$date" : "2021-11-15T10:58:01.456Z" },
"primaryId": "SomeId2",
"secondaryIdx": 1,
"otherData" : something
}
{
"_id": { "$oid" : "Object4" },
"created": { "$date" : "2021-11-16T10:58:01.456Z" },
"primaryId": "SomeId1",
"secondaryIdx": 0,
"otherData" : something
}]
而且我生成的集合应该只包含具有 primaryId + secondaryIdx 的唯一组合的文档。只要有超过一个文件的这种组合,我只想得到最新的一个。
所以我的结果应该是这样的:
{
"_id": { "$oid" : "Object2" },
"created": { "$date" : "2021-11-13T10:58:01.456Z" },
"primaryId": "SomeId2",
"secondaryIdx": 0,
"otherData" : something
}
{
"_id": { "$oid" : "Object3" },
"created": { "$date" : "2021-11-15T10:58:01.456Z" },
"primaryId": "SomeId2",
"secondaryIdx": 1,
"otherData" : something
}
{
"_id": { "$oid" : "Object4" },
"created": { "$date" : "2021-11-16T10:58:01.456Z" },
"primaryId": "SomeId1",
"secondaryIdx": 0,
"otherData" : something
}]
所以 Object1 被排除在结果之外,因为 primaryId+secondaryIdx 重复,而 Object4 较新。
我目前在我的应用程序中实现了此功能,但我认为在查询级别执行此操作会更好,因此我的应用程序不必加载不必要的大集合,然后可能会过滤掉大部分无论如何,他们立即。
您可以先$sort
primaryId: 1, secondaryIdx: 1, created: -1
。然后通过primaryId + secondaryIdx
做一个$group
然后取第一个文件。
db.collection.aggregate([
{
$sort: {
primaryId: 1,
secondaryIdx: 1,
created: -1
}
},
{
$group: {
_id: {
primaryId: "$primaryId",
secondaryIdx: "$secondaryIdx"
},
lastDoc: {
$first: "$$ROOT"
}
}
},
{
"$replaceRoot": {
"newRoot": "$lastDoc"
}
}
])
这里是Mongo playground供您参考。
所以我有一个问题,如何对数据库形成一个稍微复杂一点的特定查询。 我有一个包含类似文件的集合:
[{
"_id": { "$oid" : "Object1" },
"created": { "$date" : "2021-11-14T10:58:01.456Z" },
"primaryId": "SomeId1",
"secondaryIdx": 0,
"otherData" : something
}
{
"_id": { "$oid" : "Object2" },
"created": { "$date" : "2021-11-13T10:58:01.456Z" },
"primaryId": "SomeId2",
"secondaryIdx": 0,
"otherData" : something
}
{
"_id": { "$oid" : "Object3" },
"created": { "$date" : "2021-11-15T10:58:01.456Z" },
"primaryId": "SomeId2",
"secondaryIdx": 1,
"otherData" : something
}
{
"_id": { "$oid" : "Object4" },
"created": { "$date" : "2021-11-16T10:58:01.456Z" },
"primaryId": "SomeId1",
"secondaryIdx": 0,
"otherData" : something
}]
而且我生成的集合应该只包含具有 primaryId + secondaryIdx 的唯一组合的文档。只要有超过一个文件的这种组合,我只想得到最新的一个。 所以我的结果应该是这样的:
{
"_id": { "$oid" : "Object2" },
"created": { "$date" : "2021-11-13T10:58:01.456Z" },
"primaryId": "SomeId2",
"secondaryIdx": 0,
"otherData" : something
}
{
"_id": { "$oid" : "Object3" },
"created": { "$date" : "2021-11-15T10:58:01.456Z" },
"primaryId": "SomeId2",
"secondaryIdx": 1,
"otherData" : something
}
{
"_id": { "$oid" : "Object4" },
"created": { "$date" : "2021-11-16T10:58:01.456Z" },
"primaryId": "SomeId1",
"secondaryIdx": 0,
"otherData" : something
}]
所以 Object1 被排除在结果之外,因为 primaryId+secondaryIdx 重复,而 Object4 较新。
我目前在我的应用程序中实现了此功能,但我认为在查询级别执行此操作会更好,因此我的应用程序不必加载不必要的大集合,然后可能会过滤掉大部分无论如何,他们立即。
您可以先$sort
primaryId: 1, secondaryIdx: 1, created: -1
。然后通过primaryId + secondaryIdx
做一个$group
然后取第一个文件。
db.collection.aggregate([
{
$sort: {
primaryId: 1,
secondaryIdx: 1,
created: -1
}
},
{
$group: {
_id: {
primaryId: "$primaryId",
secondaryIdx: "$secondaryIdx"
},
lastDoc: {
$first: "$$ROOT"
}
}
},
{
"$replaceRoot": {
"newRoot": "$lastDoc"
}
}
])
这里是Mongo playground供您参考。