按另一个字段的值对文档进行分组
Group documents by value from another field
我有
格式的文件
{
"_id": <some_id>,
"code": <some_code>,
"manually_updated": {
"code": <some_code>
}
}
我想通过查看根 code
值以及 manually_updated.code
字段来查找重复项(组 文档)。因此,以下三个文档将被视为重复项(第二个文档 code
是 "overwritten" 通过将代码添加到 manually_updated
与第一个和第三个文档相同的 code
):
{
{
"_id" : ObjectId("5d2dc168651ce400a327b408"),
"code": 'ABCD',
"manually_updated": {}
},
{
"_id" : ObjectId("5d40861411981f0068e22511"),
"code": 'EFGH',
"manually_updated": {
"code": "ABCD"
}
},
{
"_id" : ObjectId("5d41374311981f0163779b79"),
"code": 'ABCD',
"manually_updated": {}
}
}
谢谢。
请试试这个:
db.getCollection('yourCollection').aggregate([{
$lookup:
{
from: "yourCollection",
let: { codeToBeCompared: "$code", manualCode: '$manually_updated.code' },
pipeline: [
{
$match:
{
$expr:
{
$or:
[
{ $eq: ["$code", "$$codeToBeCompared"] },
{ $eq: ["$manually_updated.code", "$$codeToBeCompared"] },
{ $and: [{ $gt: ['$manually_updated', {}] }, { $eq: ["$manually_updated.code", '$$manualCode'] }] }
]
}
}
}
],
as: "data"
}
}, { $group: { _id: '$code', manually_updated: { $push: '$manually_updated' }, finalData: { $first: '$$ROOT' } } }, { $match: { $expr: { $gt: [{ $size: "$finalData.data" }, 1] } } },
{ $project: { 'manually_updated': 1, 'data': '$finalData.data' } }])
示例文档:
/* 1 */
{
"_id" : ObjectId("5d2dc168651ce400a327b408"),
"code" : "ABCD",
"manually_updated" : {}
}
/* 2 */
{
"_id" : ObjectId("5d40861411981f0068e22511"),
"code" : "EFGH",
"manually_updated" : {
"code" : "ABCD"
}
}
/* 3 */
{
"_id" : ObjectId("5d41374311981f0163779b79"),
"code" : "ABCD",
"manually_updated" : {}
}
/* 4 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd21"),
"code" : "APPPP",
"manually_updated" : {}
}
/* 5 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd22"),
"code" : "APPPP",
"manually_updated" : {
"code" : "ABCD"
}
}
/* 6 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd23"),
"code" : "APPPP",
"manually_updated" : {}
}
/* 7 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd24"),
"code" : "deffffff",
"manually_updated" : {}
}
输出:
/* 1 */
{
"_id" : "APPPP",
"manually_updated" : [ // Preserving this to say we've passed thru these values
{},
{
"code": "ABCD"
},
{}
],
"data" : [
{
"_id": ObjectId("5d518a3ce8078d6134c4cd21"),
"code": "APPPP",
"manually_updated": {}
},
{
"_id": ObjectId("5d518a3ce8078d6134c4cd22"),
"code": "APPPP",
"manually_updated": {
"code": "ABCD"
}
},
{
"_id": ObjectId("5d518a3ce8078d6134c4cd23"),
"code": "APPPP",
"manually_updated": {}
}
]
}
/* 2 */
{
"_id" : "EFGH",
"manually_updated" : [
{
"code": "ABCD"
}
],
"data" : [
{
"_id": ObjectId("5d40861411981f0068e22511"),
"code": "EFGH",
"manually_updated": {
"code": "ABCD"
}
}
]
}
/* 3 */
{
"_id" : "ABCD",
"manually_updated" : [
{},
{}
],
"data" : [
{
"_id": ObjectId("5d2dc168651ce400a327b408"),
"code": "ABCD",
"manually_updated": {}
},
{
"_id": ObjectId("5d40861411981f0068e22511"),
"code": "EFGH",
"manually_updated": {
"code": "ABCD"
}
},
{
"_id": ObjectId("5d41374311981f0163779b79"),
"code": "ABCD",
"manually_updated": {}
},
{
"_id": ObjectId("5d518a3ce8078d6134c4cd22"),
"code": "APPPP",
"manually_updated": {
"code": "ABCD"
}
}
]
}
此外,这会扫描所有内容,您可以将 $match
作为第一阶段,根据特定代码过滤文档。
我有
格式的文件{
"_id": <some_id>,
"code": <some_code>,
"manually_updated": {
"code": <some_code>
}
}
我想通过查看根 code
值以及 manually_updated.code
字段来查找重复项(组 文档)。因此,以下三个文档将被视为重复项(第二个文档 code
是 "overwritten" 通过将代码添加到 manually_updated
与第一个和第三个文档相同的 code
):
{
{
"_id" : ObjectId("5d2dc168651ce400a327b408"),
"code": 'ABCD',
"manually_updated": {}
},
{
"_id" : ObjectId("5d40861411981f0068e22511"),
"code": 'EFGH',
"manually_updated": {
"code": "ABCD"
}
},
{
"_id" : ObjectId("5d41374311981f0163779b79"),
"code": 'ABCD',
"manually_updated": {}
}
}
谢谢。
请试试这个:
db.getCollection('yourCollection').aggregate([{
$lookup:
{
from: "yourCollection",
let: { codeToBeCompared: "$code", manualCode: '$manually_updated.code' },
pipeline: [
{
$match:
{
$expr:
{
$or:
[
{ $eq: ["$code", "$$codeToBeCompared"] },
{ $eq: ["$manually_updated.code", "$$codeToBeCompared"] },
{ $and: [{ $gt: ['$manually_updated', {}] }, { $eq: ["$manually_updated.code", '$$manualCode'] }] }
]
}
}
}
],
as: "data"
}
}, { $group: { _id: '$code', manually_updated: { $push: '$manually_updated' }, finalData: { $first: '$$ROOT' } } }, { $match: { $expr: { $gt: [{ $size: "$finalData.data" }, 1] } } },
{ $project: { 'manually_updated': 1, 'data': '$finalData.data' } }])
示例文档:
/* 1 */
{
"_id" : ObjectId("5d2dc168651ce400a327b408"),
"code" : "ABCD",
"manually_updated" : {}
}
/* 2 */
{
"_id" : ObjectId("5d40861411981f0068e22511"),
"code" : "EFGH",
"manually_updated" : {
"code" : "ABCD"
}
}
/* 3 */
{
"_id" : ObjectId("5d41374311981f0163779b79"),
"code" : "ABCD",
"manually_updated" : {}
}
/* 4 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd21"),
"code" : "APPPP",
"manually_updated" : {}
}
/* 5 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd22"),
"code" : "APPPP",
"manually_updated" : {
"code" : "ABCD"
}
}
/* 6 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd23"),
"code" : "APPPP",
"manually_updated" : {}
}
/* 7 */
{
"_id" : ObjectId("5d518a3ce8078d6134c4cd24"),
"code" : "deffffff",
"manually_updated" : {}
}
输出:
/* 1 */
{
"_id" : "APPPP",
"manually_updated" : [ // Preserving this to say we've passed thru these values
{},
{
"code": "ABCD"
},
{}
],
"data" : [
{
"_id": ObjectId("5d518a3ce8078d6134c4cd21"),
"code": "APPPP",
"manually_updated": {}
},
{
"_id": ObjectId("5d518a3ce8078d6134c4cd22"),
"code": "APPPP",
"manually_updated": {
"code": "ABCD"
}
},
{
"_id": ObjectId("5d518a3ce8078d6134c4cd23"),
"code": "APPPP",
"manually_updated": {}
}
]
}
/* 2 */
{
"_id" : "EFGH",
"manually_updated" : [
{
"code": "ABCD"
}
],
"data" : [
{
"_id": ObjectId("5d40861411981f0068e22511"),
"code": "EFGH",
"manually_updated": {
"code": "ABCD"
}
}
]
}
/* 3 */
{
"_id" : "ABCD",
"manually_updated" : [
{},
{}
],
"data" : [
{
"_id": ObjectId("5d2dc168651ce400a327b408"),
"code": "ABCD",
"manually_updated": {}
},
{
"_id": ObjectId("5d40861411981f0068e22511"),
"code": "EFGH",
"manually_updated": {
"code": "ABCD"
}
},
{
"_id": ObjectId("5d41374311981f0163779b79"),
"code": "ABCD",
"manually_updated": {}
},
{
"_id": ObjectId("5d518a3ce8078d6134c4cd22"),
"code": "APPPP",
"manually_updated": {
"code": "ABCD"
}
}
]
}
此外,这会扫描所有内容,您可以将 $match
作为第一阶段,根据特定代码过滤文档。