在 MongoDB 中加入两个 collections 与 MapReduce
Join two collections with MapReduce in MongoDB
我已经知道 MongoDB 不支持连接操作,但我必须使用 mapReduce
范例模拟 $lookup
(来自聚合框架)。
我的两个collections是:
// Employees sample
{
"_id" : "1234",
"first_name" : "John",
"last_name" : "Bush",
"departments" :
[
{ "dep_id" : "d001", "hire_date" : "date001" },
{ "dep_id" : "d004", "hire_date" : "date004" }
]
}
{
"_id" : "5678",
"first_name" : "Johny",
"last_name" : "Cash",
"departments" : [ { "dep_id" : "d001", "hire_date" : "date03" } ]
}
{
"_id" : "9012",
"first_name" : "Susan",
"last_name" : "Bowdy",
"departments" : [ { "dep_id" : "d004", "hire_date" : "date04" } ]
}
// Departments sample
{
"_id" : "d001",
"dep_name" : "Sales",
"employees" : [ "1234", "5678" ]
},
{
"_id" : "d004",
"name" : "Quality M",
"employees" : [ "1234", "9012" ]
}
实际上我想要这样的结果:
{
"_id" : "1234",
"value" :
{
"first_name" : "John",
"departments" :
[
{ "dep_id" : "d001", "dep_name" : "Sales" },
{ "dep_id" : "d004", "dep_name" : "Quality M" }
]
}
}
{
"_id" : "5678",
"value" :
{
"first_name" : "Johnny",
"departments" : [ { "dep_id" : "d001", "dep_name" : "Sales" } ]
}
}
{
"_id" : "9012",
"value" :
{
"first_name" : "Susan",
"departments" : [ { "dep_id" : "d004", "dep_name" : "Quality M" } ]
}
}
公共字段是 dep_id
(来自员工)和 _id
(来自部门)。
我的代码是下一个,但它没有按我的需要工作。
var mapD = function() {
for (var i=0; i<this.employees.length; i++) {
emit(this.employees[i], { dep_id: 0, dep_name: this.dep_name });
}
}
var mapE = function() {
for (var i=0; i<this.departments.length; i++) {
emit(this._id, { dep_id: this.departments[i].dep_id, dep_name: 0 });
}
}
var reduceLookUp = function(key, values) {
var result = {dep_id: 0, dep_name: 0};
values.forEach(function(value) {
if (value.dep_name !== null && value.dep_name !== undefined) {
result.dep_name = values.dep_name;
}
if (value.dep_id !== null && value.dep_id !== undefined) {
result.dep_id = value.dep_id;
}
});
return result;
};
db.Departments.mapReduce(mapD, reduceLookUp, { out: { reduce: "joined" } });
db.Employees.mapReduce(mapE, reduceLookUp, { out: { reduce: "joined" } });
非常感谢您的帮助!提前致谢。
在你的问题中,first_name
只能从 Employees
集合中获取,dep_name
只能从 Departments
集合中获取。
您可以使用 MapReduce 和聚合框架来实现它。
1. MapReduce 解决方案
如果您按如下方式修改 map 和 reduce 函数
var mapD = function() {
for (var i=0; i<this.employees.length; i++)
emit(this.employees[i], { dep_id: this._id, dep_name: this.dep_name });
}
var mapE = function() { emit(this._id, { first_name: this.first_name }); }
var reduceLookUp = function(key, values) {
var results = {};
var departments = [];
values.forEach(function(value) {
var department = {};
if (value.dep_id !== undefined) department["dep_id"] = value.dep_id;
if (value.dep_name !== undefined) department["dep_name"] = value.dep_name;
if (Object.keys(department).length > 0) departments.push(department);
if (value.first_name !== undefined) results["first_name"] = value.first_name;
if (value.departments !== undefined) results["departments"] = value.departments;
});
if (Object.keys(departments).length > 0) results["departments"] = departments;
return results;
}
然后首先调用 MapReduce
db.Departments.mapReduce(mapD, reduceLookUp, { out: { reduce: "joined" } });
将插入到 joined
集合中
{
"_id" : "1234",
"value" :
{
"departments" :
[
{ "dep_id" : "d001", "dep_name" : "Sales" },
{ "dep_id" : "d004", "dep_name" : "Quality M" }
]
}
}
第二次通话时
db.Employees.mapReduce(mapE, reduceLookUp, { out: { reduce: "joined" } });
应该插入
{ "_id" : "1234", "value" : { "first_name" : "John" } }
但是,根据documentation,reduce
输出选项将
Merge the new result with the existing result if the output collection
already exists. If an existing document has the same key as the new
result, apply the reduce function to both the new and the existing
documents and overwrite the existing document with the result
因此,在您的情况下,reduce 函数将使用参数再次调用
key = "1234",
values =
[
{
"departments" :
[
{ "dep_id" : "d001", "dep_name" : "Sales" },
{ "dep_id" : "d004", "dep_name" : "Quality M" }
]
},
{ "first_name" : "John" }
]
最终结果是
{
"_id" : "1234",
"value" :
{
"first_name" : "John",
"departments" :
[
{ "dep_id" : "d001", "dep_name" : "Sales" },
{ "dep_id" : "d004", "dep_name" : "Quality M" }
]
}
}
2。聚合框架方案
对于您的问题更好的解决方案是使用 aggregation framework instead of Map-Reduce. Here you would use $lookup
阶段从 Employees
获取一些数据
db.Departments.aggregate([
{ $unwind: "$employees" },
{
$lookup:
{
from: "Employees",
localField: "employees",
foreignField: "_id",
as: "employee"
}
},
{ $unwind: "$employee" },
{
$group:
{
"_id": "$employees",
"first_name": { $first: "$employee.first_name" },
"departments": { $push: { dep_id: "$_id", dep_name: "$dep_name" } }
}
}
]);
这将导致
{
"_id" : "1234",
"first_name" : "John",
"departments" :
[
{ "dep_id" : "d001", "dep_name" : "Sales" },
{ "dep_id" : "d004", "dep_name" : "Quality M" }
]
}
我已经知道 MongoDB 不支持连接操作,但我必须使用 mapReduce
范例模拟 $lookup
(来自聚合框架)。
我的两个collections是:
// Employees sample
{
"_id" : "1234",
"first_name" : "John",
"last_name" : "Bush",
"departments" :
[
{ "dep_id" : "d001", "hire_date" : "date001" },
{ "dep_id" : "d004", "hire_date" : "date004" }
]
}
{
"_id" : "5678",
"first_name" : "Johny",
"last_name" : "Cash",
"departments" : [ { "dep_id" : "d001", "hire_date" : "date03" } ]
}
{
"_id" : "9012",
"first_name" : "Susan",
"last_name" : "Bowdy",
"departments" : [ { "dep_id" : "d004", "hire_date" : "date04" } ]
}
// Departments sample
{
"_id" : "d001",
"dep_name" : "Sales",
"employees" : [ "1234", "5678" ]
},
{
"_id" : "d004",
"name" : "Quality M",
"employees" : [ "1234", "9012" ]
}
实际上我想要这样的结果:
{
"_id" : "1234",
"value" :
{
"first_name" : "John",
"departments" :
[
{ "dep_id" : "d001", "dep_name" : "Sales" },
{ "dep_id" : "d004", "dep_name" : "Quality M" }
]
}
}
{
"_id" : "5678",
"value" :
{
"first_name" : "Johnny",
"departments" : [ { "dep_id" : "d001", "dep_name" : "Sales" } ]
}
}
{
"_id" : "9012",
"value" :
{
"first_name" : "Susan",
"departments" : [ { "dep_id" : "d004", "dep_name" : "Quality M" } ]
}
}
公共字段是 dep_id
(来自员工)和 _id
(来自部门)。
我的代码是下一个,但它没有按我的需要工作。
var mapD = function() {
for (var i=0; i<this.employees.length; i++) {
emit(this.employees[i], { dep_id: 0, dep_name: this.dep_name });
}
}
var mapE = function() {
for (var i=0; i<this.departments.length; i++) {
emit(this._id, { dep_id: this.departments[i].dep_id, dep_name: 0 });
}
}
var reduceLookUp = function(key, values) {
var result = {dep_id: 0, dep_name: 0};
values.forEach(function(value) {
if (value.dep_name !== null && value.dep_name !== undefined) {
result.dep_name = values.dep_name;
}
if (value.dep_id !== null && value.dep_id !== undefined) {
result.dep_id = value.dep_id;
}
});
return result;
};
db.Departments.mapReduce(mapD, reduceLookUp, { out: { reduce: "joined" } });
db.Employees.mapReduce(mapE, reduceLookUp, { out: { reduce: "joined" } });
非常感谢您的帮助!提前致谢。
在你的问题中,first_name
只能从 Employees
集合中获取,dep_name
只能从 Departments
集合中获取。
您可以使用 MapReduce 和聚合框架来实现它。
1. MapReduce 解决方案
如果您按如下方式修改 map 和 reduce 函数
var mapD = function() {
for (var i=0; i<this.employees.length; i++)
emit(this.employees[i], { dep_id: this._id, dep_name: this.dep_name });
}
var mapE = function() { emit(this._id, { first_name: this.first_name }); }
var reduceLookUp = function(key, values) {
var results = {};
var departments = [];
values.forEach(function(value) {
var department = {};
if (value.dep_id !== undefined) department["dep_id"] = value.dep_id;
if (value.dep_name !== undefined) department["dep_name"] = value.dep_name;
if (Object.keys(department).length > 0) departments.push(department);
if (value.first_name !== undefined) results["first_name"] = value.first_name;
if (value.departments !== undefined) results["departments"] = value.departments;
});
if (Object.keys(departments).length > 0) results["departments"] = departments;
return results;
}
然后首先调用 MapReduce
db.Departments.mapReduce(mapD, reduceLookUp, { out: { reduce: "joined" } });
将插入到 joined
集合中
{
"_id" : "1234",
"value" :
{
"departments" :
[
{ "dep_id" : "d001", "dep_name" : "Sales" },
{ "dep_id" : "d004", "dep_name" : "Quality M" }
]
}
}
第二次通话时
db.Employees.mapReduce(mapE, reduceLookUp, { out: { reduce: "joined" } });
应该插入
{ "_id" : "1234", "value" : { "first_name" : "John" } }
但是,根据documentation,reduce
输出选项将
Merge the new result with the existing result if the output collection already exists. If an existing document has the same key as the new result, apply the reduce function to both the new and the existing documents and overwrite the existing document with the result
因此,在您的情况下,reduce 函数将使用参数再次调用
key = "1234",
values =
[
{
"departments" :
[
{ "dep_id" : "d001", "dep_name" : "Sales" },
{ "dep_id" : "d004", "dep_name" : "Quality M" }
]
},
{ "first_name" : "John" }
]
最终结果是
{
"_id" : "1234",
"value" :
{
"first_name" : "John",
"departments" :
[
{ "dep_id" : "d001", "dep_name" : "Sales" },
{ "dep_id" : "d004", "dep_name" : "Quality M" }
]
}
}
2。聚合框架方案
对于您的问题更好的解决方案是使用 aggregation framework instead of Map-Reduce. Here you would use $lookup
阶段从 Employees
db.Departments.aggregate([
{ $unwind: "$employees" },
{
$lookup:
{
from: "Employees",
localField: "employees",
foreignField: "_id",
as: "employee"
}
},
{ $unwind: "$employee" },
{
$group:
{
"_id": "$employees",
"first_name": { $first: "$employee.first_name" },
"departments": { $push: { dep_id: "$_id", dep_name: "$dep_name" } }
}
}
]);
这将导致
{
"_id" : "1234",
"first_name" : "John",
"departments" :
[
{ "dep_id" : "d001", "dep_name" : "Sales" },
{ "dep_id" : "d004", "dep_name" : "Quality M" }
]
}