嵌入式文档数组 - MongoDB
Array of embedded documents - MongoDB
我有这两份 MongoDB 文件。我想找到皇家和巴塞罗那体育场的容量,假设皇家马德里有两个体育场(对不起 Merengues :)
{
"_id" : "Bar.43",
"official_name" : "Futbol Club Barcelona
"country" : "Spain",
"started_by" : {
"day" : 28,
"month" : 11,
"year" : 1899
},
"stadium" : {
"name" : "Camp Nou",
"capacity" : 99354
},
"palmarès" : {
"La Liga" : 23,
"Copa del Rey" : 27,
"Supercopa de Espana" : 11,
"UEFA Champions League" : 4,
"UEFA Cup Winners Cup" : 4,
"UEFA Super Cup" : 4,
"FIFA Club World cup" : 2
},
"uniform" : "blue and dark red"
},
{
"_id" : "RMa.103",
"official_name" : "Real Madrid Club de Fùtbol
"country" : "Spain",
"started_by" : {
"day" : 6,
"month" : 3,
"year" : 1902
},
"stadium" : [{
"name" : "Santiago Bernabeu",
"capacity" : 85454
},
{
"name" : "Vicente Calderon"
"capacity" : 54907
}],
"palmarès" : {
"La Liga" : 32,
"Copa del Rey" : 19,
"Supercopa de Espana" : 9,
"UEFA Champions League" : 10,
"UEFA Europa League" : 2,
"UEFA Super Cup" : 2,
"FIFA Club World cup" : 4
},
"uniform" : "white"
}
嗯,我的查询是:
db.team.aggregate([{$group:{_id:"$country", capacityStadium:{$sum:"$stadium.capacity"}}}])
但是不行。如果 Real Madrid 只有一个体育场,那么我的查询有效。所以,一般来说,当我有一个嵌入式文档数组并且我想使用聚合时,我必须使用 $unwind 来划分该数组的文档吗?
问题是巴塞罗那的体育场不是文档数组,查询出错。
您可能需要更改文档结构,因为 stadium
字段在每个文档中都应该是一个数组。为此,您需要找到 stadium 不是数组的文档,并使用 "Bulk" 操作更新它们以获得最大效率
var bulk = db.team.initializeOrderedBulkOp();
var count = 0;
db.team.find({ "stadium.0": { "$exists": false }}).forEach(function(doc) {
bulk.find({ "_id": doc._id })
.updateOne({ "$set": { "stadium": [doc.stadium] }});
count++;
if(count % 100 == 0) {
bulk.execute();
bulk = db.team.initializeOrderedBulkOp(); } })
if(count % 100 > 0) {
bulk.execute();
}
现在使用聚合框架。
db.team.aggregate(
[
{ "$project": { "stadium.capacity": 1 }},
{ "$unwind": "$stadium" },
{ "$group": { "_id": "$_id", "totalCapacity": { "$sum": "$stadium.capacity" }}}
]
)
哪个returns:
{ "_id" : "RMa.103", "totalCapacity" : 140361 }
{ "_id" : "Bar.43", "totalCapacity" : 99354 }
我有这两份 MongoDB 文件。我想找到皇家和巴塞罗那体育场的容量,假设皇家马德里有两个体育场(对不起 Merengues :)
{
"_id" : "Bar.43",
"official_name" : "Futbol Club Barcelona
"country" : "Spain",
"started_by" : {
"day" : 28,
"month" : 11,
"year" : 1899
},
"stadium" : {
"name" : "Camp Nou",
"capacity" : 99354
},
"palmarès" : {
"La Liga" : 23,
"Copa del Rey" : 27,
"Supercopa de Espana" : 11,
"UEFA Champions League" : 4,
"UEFA Cup Winners Cup" : 4,
"UEFA Super Cup" : 4,
"FIFA Club World cup" : 2
},
"uniform" : "blue and dark red"
},
{
"_id" : "RMa.103",
"official_name" : "Real Madrid Club de Fùtbol
"country" : "Spain",
"started_by" : {
"day" : 6,
"month" : 3,
"year" : 1902
},
"stadium" : [{
"name" : "Santiago Bernabeu",
"capacity" : 85454
},
{
"name" : "Vicente Calderon"
"capacity" : 54907
}],
"palmarès" : {
"La Liga" : 32,
"Copa del Rey" : 19,
"Supercopa de Espana" : 9,
"UEFA Champions League" : 10,
"UEFA Europa League" : 2,
"UEFA Super Cup" : 2,
"FIFA Club World cup" : 4
},
"uniform" : "white"
}
嗯,我的查询是:
db.team.aggregate([{$group:{_id:"$country", capacityStadium:{$sum:"$stadium.capacity"}}}])
但是不行。如果 Real Madrid 只有一个体育场,那么我的查询有效。所以,一般来说,当我有一个嵌入式文档数组并且我想使用聚合时,我必须使用 $unwind 来划分该数组的文档吗? 问题是巴塞罗那的体育场不是文档数组,查询出错。
您可能需要更改文档结构,因为 stadium
字段在每个文档中都应该是一个数组。为此,您需要找到 stadium 不是数组的文档,并使用 "Bulk" 操作更新它们以获得最大效率
var bulk = db.team.initializeOrderedBulkOp();
var count = 0;
db.team.find({ "stadium.0": { "$exists": false }}).forEach(function(doc) {
bulk.find({ "_id": doc._id })
.updateOne({ "$set": { "stadium": [doc.stadium] }});
count++;
if(count % 100 == 0) {
bulk.execute();
bulk = db.team.initializeOrderedBulkOp(); } })
if(count % 100 > 0) {
bulk.execute();
}
现在使用聚合框架。
db.team.aggregate(
[
{ "$project": { "stadium.capacity": 1 }},
{ "$unwind": "$stadium" },
{ "$group": { "_id": "$_id", "totalCapacity": { "$sum": "$stadium.capacity" }}}
]
)
哪个returns:
{ "_id" : "RMa.103", "totalCapacity" : 140361 }
{ "_id" : "Bar.43", "totalCapacity" : 99354 }