MongoDB: 如何从查询中删除重复记录?
MongoDB: How to remove duplicate records from a query?
示例集合:
员工
{"FNAME" : "John", "LNAME" : "Smith", "SSN" : "123456789", "SALARY" : 30000, "SUPERSSN" : "333445555"}
{"FNAME" : "Franklin", "LNAME" : "Wong", "SSN" : "333445555", "SALARY" : 40000, "SUPERSSN" : "888665555"}
{"FNAME" : "Joyce", "LNAME" : "English", "SSN" : "453453453", "SALARY" : 25000, "SUPERSSN" : "333445555"}
{"FNAME" : "Ramesh", "LNAME" : "Narayan", "SSN" : "666884444", "SALARY" : 38000, "SUPERSSN" : "333445555"}
{"FNAME" : "James", "LNAME" : "Borg", "SSN" : "888665555", "SALARY" : 55000, "SUPERSSN" : "", "DNO" : 1 }
{"FNAME" : "Jennifer", "LNAME" : "Wallace", "SSN" : "987654321", "SALARY" : 43000, "SUPERSSN" : "888665555"}
{"FNAME" : "Ahmad", "LNAME" : "Jabbar", "SSN" : "987987987", "SALARY" : 25000, "SUPERSSN" : "987654321"}
{"FNAME" : "Alicia", "LNAME" : "Zelaya", "SSN" : "999887777", "SALARY" : 25000, "SUPERSSN" : "987654321"}
{"FNAME" : "John", "LNAME" : "Smith", "SSN" : "123456789", "SALARY" : 30000, "SUPERSSN" : "333445555"}
{"FNAME" : "Franklin", "LNAME" : "Wong", "SSN" : "333445555", "SALARY" : 40000, "SUPERSSN" : "888665555"}
{"FNAME" : "Joyce", "LNAME" : "English", "SSN" : "453453453", "SALARY" : 25000, "SUPERSSN" : "333445555"}
{"FNAME" : "Ramesh", "LNAME" : "Narayan", "SSN" : "666884444", "SALARY" : 38000, "SUPERSSN" : "333445555"}
{"FNAME" : "James", "LNAME" : "Borg", "SSN" : "888665555", "SALARY" : 55000, "SUPERSSN" : "", "DNO" : 1 }
{"FNAME" : "Jennifer", "LNAME" : "Wallace", "SSN" : "987654321", "SALARY" : 43000, "SUPERSSN" : "888665555"}
{"FNAME" : "Ahmad", "LNAME" : "Jabbar", "SSN" : "987987987", "SALARY" : 25000, "SUPERSSN" : "987654321"}
{"FNAME" : "Alicia", "LNAME" : "Zelaya", "SSN" : "999887777", "SALARY" : 25000, "SUPERSSN" : "987654321"}
works_on
{ "ESSN" : "123456789", "PNO" : 1, "HOURS" : 32.5 }
{ "ESSN" : "123456789", "PNO" : 2, "HOURS" : 7.5 }
{ "ESSN" : "333445555", "PNO" : 2, "HOURS" : 10 }
{ "ESSN" : "333445555", "PNO" : 3, "HOURS" : 10 }
{ "ESSN" : "333445555", "PNO" : 10, "HOURS" : 10 }
{ "ESSN" : "333445555", "PNO" : 20, "HOURS" : 10 }
{ "ESSN" : "453453453", "PNO" : 1, "HOURS" : 20 }
{ "ESSN" : "453453453", "PNO" : 2, "HOURS" : 20 }
{ "ESSN" : "666884444", "PNO" : 3, "HOURS" : 40 }
{ "ESSN" : "888665555", "PNO" : 20, "HOURS" : 0 }
{ "ESSN" : "987654321", "PNO" : 20, "HOURS" : 15 }
{ "ESSN" : "987654321", "PNO" : 30, "HOURS" : 20 }
{ "ESSN" : "987987987", "PNO" : 10, "HOURS" : 35.5 }
{ "ESSN" : "987987987", "PNO" : 30, "HOURS" : 5.5 }
{ "ESSN" : "999887777", "PNO" : 10, "HOURS" : 10 }
{ "ESSN" : "999887777", "PNO" : 30, "HOURS" : 30 }
我想从以下 MongoDB 查询的“连接”中删除重复记录:
db.employee.aggregate([
{
$lookup:{
from: "works_on",
localField: "SSN",
foreignField: "ESSN",
as: "works_on_here"
}
},
{ $unwind:"$works_on_here" },
{
$group:{
_id:"$_id",
nodes:{
$addToSet:"$works_on_here"
}
},
{
$project:{
_id : 1,
FNAME : 1,
LNAME : 1,
HOURS : "$works_on_here.HOURS",
}
}
]);
预期结果是:
{ "FNAME" : "John", "LNAME" : "Smith", "HOURS" : 32.5 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
{ "FNAME" : "Joyce", "LNAME" : "English", "HOURS" : 20 }
{ "FNAME" : "Ramesh", "LNAME" : "Narayan", "HOURS" : 40 }
{ "FNAME" : "James", "LNAME" : "Borg", "HOURS" : 0 }
{ "FNAME" : "Jennifer", "LNAME" : "Wallace", "HOURS" : 20 }
{ "FNAME" : "Ahmad", "LNAME" : "Jabbar", "HOURS" : 5.5 }
{ "FNAME" : "Alicia", "LNAME" : "Zelaya", "HOURS" : 30 }
{ "FNAME" : "John", "LNAME" : "Smith", "HOURS" : 7.5 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
没有“$group”部分的实际输出如下所示:
{ "FNAME" : "John", "LNAME" : "Smith", "HOURS" : 32.5 }
{ "FNAME" : "John", "LNAME" : "Smith", "HOURS" : 7.5 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
{ "FNAME" : "Joyce", "LNAME" : "English", "HOURS" : 20 }
{ "FNAME" : "Joyce", "LNAME" : "English", "HOURS" : 20 }
{ "FNAME" : "Ramesh", "LNAME" : "Narayan", "HOURS" : 40 }
{ "FNAME" : "James", "LNAME" : "Borg", "HOURS" : 0 }
{ "FNAME" : "Jennifer", "LNAME" : "Wallace", "HOURS" : 15 }
{ "FNAME" : "Jennifer", "LNAME" : "Wallace", "HOURS" : 20 }
{ "FNAME" : "Ahmad", "LNAME" : "Jabbar", "HOURS" : 35.5 }
{ "FNAME" : "Ahmad", "LNAME" : "Jabbar", "HOURS" : 5.5 }
{ "FNAME" : "Alicia", "LNAME" : "Zelaya", "HOURS" : 10 }
{ "FNAME" : "Alicia", "LNAME" : "Zelaya", "HOURS" : 30 }
{ "FNAME" : "John", "LNAME" : "Smith", "HOURS" : 32.5 }
{ "FNAME" : "John", "LNAME" : "Smith", "HOURS" : 7.5 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
我有两个集合 'employee' 和 'works_on',我尝试做这样的事情“join”。
$group 部分的代码returns 没有。这应该用作重复过滤器或?
$group
by SSN
并获得第一个 FNAME
和 LNAME
字段,如果你想要其他字段,你可以添加与 FNAME
和 LNANE
$lookup
与 works_on
$project
显示必填字段并使用 $sum
获得总计 HOURS
总和
db.employee.aggregate([
{
$group: {
_id: "$SSN",
FNAME: { $first: "$FNAME" },
LNAME: { $first: "LNAME" }
}
},
{
$lookup: {
from: "works_on",
localField: "_id",
foreignField: "ESSN",
as: "HOURS"
}
},
{
$project: {
_id: 0,
FNAME: 1,
LNAME: 1,
HOURS: {
$sum: "$HOURS.HOURS"
}
}
}
])
示例集合:
员工
{"FNAME" : "John", "LNAME" : "Smith", "SSN" : "123456789", "SALARY" : 30000, "SUPERSSN" : "333445555"}
{"FNAME" : "Franklin", "LNAME" : "Wong", "SSN" : "333445555", "SALARY" : 40000, "SUPERSSN" : "888665555"}
{"FNAME" : "Joyce", "LNAME" : "English", "SSN" : "453453453", "SALARY" : 25000, "SUPERSSN" : "333445555"}
{"FNAME" : "Ramesh", "LNAME" : "Narayan", "SSN" : "666884444", "SALARY" : 38000, "SUPERSSN" : "333445555"}
{"FNAME" : "James", "LNAME" : "Borg", "SSN" : "888665555", "SALARY" : 55000, "SUPERSSN" : "", "DNO" : 1 }
{"FNAME" : "Jennifer", "LNAME" : "Wallace", "SSN" : "987654321", "SALARY" : 43000, "SUPERSSN" : "888665555"}
{"FNAME" : "Ahmad", "LNAME" : "Jabbar", "SSN" : "987987987", "SALARY" : 25000, "SUPERSSN" : "987654321"}
{"FNAME" : "Alicia", "LNAME" : "Zelaya", "SSN" : "999887777", "SALARY" : 25000, "SUPERSSN" : "987654321"}
{"FNAME" : "John", "LNAME" : "Smith", "SSN" : "123456789", "SALARY" : 30000, "SUPERSSN" : "333445555"}
{"FNAME" : "Franklin", "LNAME" : "Wong", "SSN" : "333445555", "SALARY" : 40000, "SUPERSSN" : "888665555"}
{"FNAME" : "Joyce", "LNAME" : "English", "SSN" : "453453453", "SALARY" : 25000, "SUPERSSN" : "333445555"}
{"FNAME" : "Ramesh", "LNAME" : "Narayan", "SSN" : "666884444", "SALARY" : 38000, "SUPERSSN" : "333445555"}
{"FNAME" : "James", "LNAME" : "Borg", "SSN" : "888665555", "SALARY" : 55000, "SUPERSSN" : "", "DNO" : 1 }
{"FNAME" : "Jennifer", "LNAME" : "Wallace", "SSN" : "987654321", "SALARY" : 43000, "SUPERSSN" : "888665555"}
{"FNAME" : "Ahmad", "LNAME" : "Jabbar", "SSN" : "987987987", "SALARY" : 25000, "SUPERSSN" : "987654321"}
{"FNAME" : "Alicia", "LNAME" : "Zelaya", "SSN" : "999887777", "SALARY" : 25000, "SUPERSSN" : "987654321"}
works_on
{ "ESSN" : "123456789", "PNO" : 1, "HOURS" : 32.5 }
{ "ESSN" : "123456789", "PNO" : 2, "HOURS" : 7.5 }
{ "ESSN" : "333445555", "PNO" : 2, "HOURS" : 10 }
{ "ESSN" : "333445555", "PNO" : 3, "HOURS" : 10 }
{ "ESSN" : "333445555", "PNO" : 10, "HOURS" : 10 }
{ "ESSN" : "333445555", "PNO" : 20, "HOURS" : 10 }
{ "ESSN" : "453453453", "PNO" : 1, "HOURS" : 20 }
{ "ESSN" : "453453453", "PNO" : 2, "HOURS" : 20 }
{ "ESSN" : "666884444", "PNO" : 3, "HOURS" : 40 }
{ "ESSN" : "888665555", "PNO" : 20, "HOURS" : 0 }
{ "ESSN" : "987654321", "PNO" : 20, "HOURS" : 15 }
{ "ESSN" : "987654321", "PNO" : 30, "HOURS" : 20 }
{ "ESSN" : "987987987", "PNO" : 10, "HOURS" : 35.5 }
{ "ESSN" : "987987987", "PNO" : 30, "HOURS" : 5.5 }
{ "ESSN" : "999887777", "PNO" : 10, "HOURS" : 10 }
{ "ESSN" : "999887777", "PNO" : 30, "HOURS" : 30 }
我想从以下 MongoDB 查询的“连接”中删除重复记录:
db.employee.aggregate([
{
$lookup:{
from: "works_on",
localField: "SSN",
foreignField: "ESSN",
as: "works_on_here"
}
},
{ $unwind:"$works_on_here" },
{
$group:{
_id:"$_id",
nodes:{
$addToSet:"$works_on_here"
}
},
{
$project:{
_id : 1,
FNAME : 1,
LNAME : 1,
HOURS : "$works_on_here.HOURS",
}
}
]);
预期结果是:
{ "FNAME" : "John", "LNAME" : "Smith", "HOURS" : 32.5 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
{ "FNAME" : "Joyce", "LNAME" : "English", "HOURS" : 20 }
{ "FNAME" : "Ramesh", "LNAME" : "Narayan", "HOURS" : 40 }
{ "FNAME" : "James", "LNAME" : "Borg", "HOURS" : 0 }
{ "FNAME" : "Jennifer", "LNAME" : "Wallace", "HOURS" : 20 }
{ "FNAME" : "Ahmad", "LNAME" : "Jabbar", "HOURS" : 5.5 }
{ "FNAME" : "Alicia", "LNAME" : "Zelaya", "HOURS" : 30 }
{ "FNAME" : "John", "LNAME" : "Smith", "HOURS" : 7.5 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
没有“$group”部分的实际输出如下所示:
{ "FNAME" : "John", "LNAME" : "Smith", "HOURS" : 32.5 }
{ "FNAME" : "John", "LNAME" : "Smith", "HOURS" : 7.5 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
{ "FNAME" : "Joyce", "LNAME" : "English", "HOURS" : 20 }
{ "FNAME" : "Joyce", "LNAME" : "English", "HOURS" : 20 }
{ "FNAME" : "Ramesh", "LNAME" : "Narayan", "HOURS" : 40 }
{ "FNAME" : "James", "LNAME" : "Borg", "HOURS" : 0 }
{ "FNAME" : "Jennifer", "LNAME" : "Wallace", "HOURS" : 15 }
{ "FNAME" : "Jennifer", "LNAME" : "Wallace", "HOURS" : 20 }
{ "FNAME" : "Ahmad", "LNAME" : "Jabbar", "HOURS" : 35.5 }
{ "FNAME" : "Ahmad", "LNAME" : "Jabbar", "HOURS" : 5.5 }
{ "FNAME" : "Alicia", "LNAME" : "Zelaya", "HOURS" : 10 }
{ "FNAME" : "Alicia", "LNAME" : "Zelaya", "HOURS" : 30 }
{ "FNAME" : "John", "LNAME" : "Smith", "HOURS" : 32.5 }
{ "FNAME" : "John", "LNAME" : "Smith", "HOURS" : 7.5 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
{ "FNAME" : "Franklin", "LNAME" : "Wong", "HOURS" : 10 }
我有两个集合 'employee' 和 'works_on',我尝试做这样的事情“join”。
$group 部分的代码returns 没有。这应该用作重复过滤器或?
$group
bySSN
并获得第一个FNAME
和LNAME
字段,如果你想要其他字段,你可以添加与FNAME
和LNANE
$lookup
与works_on
$project
显示必填字段并使用$sum
获得总计
HOURS
总和
db.employee.aggregate([
{
$group: {
_id: "$SSN",
FNAME: { $first: "$FNAME" },
LNAME: { $first: "LNAME" }
}
},
{
$lookup: {
from: "works_on",
localField: "_id",
foreignField: "ESSN",
as: "HOURS"
}
},
{
$project: {
_id: 0,
FNAME: 1,
LNAME: 1,
HOURS: {
$sum: "$HOURS.HOURS"
}
}
}
])