MongoDB Count() 与聚合
MongoDB Count() vs. Aggregation
我在 mongo 中使用了很多聚合,我知道分组计数等方面的性能优势。但是,mongo 在这两种计算所有方法的性能上有什么不同吗集合中的文档?:
collection.aggregate([
{
$match: {}
},{
$group: {
_id: null,
count: {$sum: 1}
}
}]);
和
collection.find({}).count()
更新:第二种情况:
假设我们有这个示例数据:
{_id: 1, type: 'one', value: true}
{_id: 2, type: 'two', value: false}
{_id: 4, type: 'five', value: false}
与aggregate()
:
var _ids = ['id1', 'id2', 'id3'];
var counted = Collections.mail.aggregate([
{
'$match': {
_id: {
'$in': _ids
},
value: false
}
}, {
'$group': {
_id: "$type",
count: {
'$sum': 1
}
}
}
]);
与count()
:
var counted = {};
var type = 'two';
for (i = 0, len = _ids.length; i < len; i++) {
counted[_ids[i]] = Collections.mail.find({
_id: _ids[i], value: false, type: type
}).count();
}
.count()
速度要快得多。您可以通过调用
查看实现
// Note the missing parentheses at the end
db.collection.count
其中returns光标的长度。默认查询的(如果调用 count()
时没有查询文档),它又被实现为返回 _id_
索引的长度,iirc.
然而,聚合会读取每个文档并对其进行处理。当只对大约 100k 的文档进行处理时,这只能是与 .count()
相同数量级的一半(根据您的 RAM 取舍)。
以下函数应用于包含 1200 万条条目的集合:
function checkSpeed(col,iterations){
// Get the collection
var collectionUnderTest = db[col];
// The collection we are writing our stats to
var stats = db[col+'STATS']
// remove old stats
stats.remove({})
// Prevent allocation in loop
var start = new Date().getTime()
var duration = new Date().getTime()
print("Counting with count()")
for (var i = 1; i <= iterations; i++){
start = new Date().getTime();
var result = collectionUnderTest.count()
duration = new Date().getTime() - start
stats.insert({"type":"count","pass":i,"duration":duration,"count":result})
}
print("Counting with aggregation")
for(var j = 1; j <= iterations; j++){
start = new Date().getTime()
var doc = collectionUnderTest.aggregate([{ $group:{_id: null, count:{ $sum: 1 } } }])
duration = new Date().getTime() - start
stats.insert({"type":"aggregation", "pass":j, "duration": duration,"count":doc.count})
}
var averages = stats.aggregate([
{$group:{_id:"$type","average":{"$avg":"$duration"}}}
])
return averages
}
并返回:
{ "_id" : "aggregation", "average" : 43828.8 }
{ "_id" : "count", "average" : 0.6 }
单位是毫秒
hth
我在 mongo 中使用了很多聚合,我知道分组计数等方面的性能优势。但是,mongo 在这两种计算所有方法的性能上有什么不同吗集合中的文档?:
collection.aggregate([
{
$match: {}
},{
$group: {
_id: null,
count: {$sum: 1}
}
}]);
和
collection.find({}).count()
更新:第二种情况: 假设我们有这个示例数据:
{_id: 1, type: 'one', value: true}
{_id: 2, type: 'two', value: false}
{_id: 4, type: 'five', value: false}
与aggregate()
:
var _ids = ['id1', 'id2', 'id3'];
var counted = Collections.mail.aggregate([
{
'$match': {
_id: {
'$in': _ids
},
value: false
}
}, {
'$group': {
_id: "$type",
count: {
'$sum': 1
}
}
}
]);
与count()
:
var counted = {};
var type = 'two';
for (i = 0, len = _ids.length; i < len; i++) {
counted[_ids[i]] = Collections.mail.find({
_id: _ids[i], value: false, type: type
}).count();
}
.count()
速度要快得多。您可以通过调用
// Note the missing parentheses at the end
db.collection.count
其中returns光标的长度。默认查询的(如果调用 count()
时没有查询文档),它又被实现为返回 _id_
索引的长度,iirc.
然而,聚合会读取每个文档并对其进行处理。当只对大约 100k 的文档进行处理时,这只能是与 .count()
相同数量级的一半(根据您的 RAM 取舍)。
以下函数应用于包含 1200 万条条目的集合:
function checkSpeed(col,iterations){
// Get the collection
var collectionUnderTest = db[col];
// The collection we are writing our stats to
var stats = db[col+'STATS']
// remove old stats
stats.remove({})
// Prevent allocation in loop
var start = new Date().getTime()
var duration = new Date().getTime()
print("Counting with count()")
for (var i = 1; i <= iterations; i++){
start = new Date().getTime();
var result = collectionUnderTest.count()
duration = new Date().getTime() - start
stats.insert({"type":"count","pass":i,"duration":duration,"count":result})
}
print("Counting with aggregation")
for(var j = 1; j <= iterations; j++){
start = new Date().getTime()
var doc = collectionUnderTest.aggregate([{ $group:{_id: null, count:{ $sum: 1 } } }])
duration = new Date().getTime() - start
stats.insert({"type":"aggregation", "pass":j, "duration": duration,"count":doc.count})
}
var averages = stats.aggregate([
{$group:{_id:"$type","average":{"$avg":"$duration"}}}
])
return averages
}
并返回:
{ "_id" : "aggregation", "average" : 43828.8 }
{ "_id" : "count", "average" : 0.6 }
单位是毫秒
hth