如何在 mongodb 中聚合时间序列文档
How to aggregate time series documents in mongodb
我有一个 mongo 分片集群,我在其中保存来自虚拟机监控系统 (zabbix ecc) 的数据。现在我想从数据库中获取一些信息,例如一个虚拟机最近 2 天的 avg memfree。
我阅读了有关聚合的教程以及与 sql 的比较,但我不明白如何查询时间序列文档(写得像 mongo 在网络研讨会上建议的那样)。
示例:我收集了很多这样的文档(一个文档代表 1 小时):
"_id" : ObjectId("5558ab960e8956234285de14"),
"timestamp_hour" : ISODate("2014-10-13T23:00:00.000+02:00"),
"name" : "memfree",
"unity" : "B",
"values" : {
"0" : {
"0" : 2041004032.0000000000000000,
"1" : 2041004032.0000000000000000,
"2" : 2041004032.0000000000000000,
"3" : 2041004032.0000000000000000,
"4" : 2041004032.0000000000000000,
"5" : 2041004032.0000000000000000,
"6" : 2041004032.0000000000000000,
"7" : 2041004032.0000000000000000,
"8" : 2041004032.0000000000000000,
"9" : 2041004032.0000000000000000,
"10" : 2041004032.0000000000000000,
"11" : 2041004032.0000000000000000,
"12" : 2041004032.0000000000000000,
"13" : 2041004032.0000000000000000,
"14" : 2041004032.0000000000000000,
"15" : 2041004032.0000000000000000,
"16" : 2041004032.0000000000000000,
"17" : 2041004032.0000000000000000,
"18" : 2041004032.0000000000000000,
"19" : 2041004032.0000000000000000,
"20" : 2041004032.0000000000000000,
"21" : 2041004032.0000000000000000,
"22" : 2041004032.0000000000000000,
"23" : 2041004032.0000000000000000,
"24" : 2041004032.0000000000000000,
"25" : 2041004032.0000000000000000,
"26" : 2041004032.0000000000000000,
"27" : 2041004032.0000000000000000,
"28" : 2041004032.0000000000000000,
"29" : 2041004032.0000000000000000,
"30" : 2041004032.0000000000000000,
"31" : 2041004032.0000000000000000,
"32" : 2041004032.0000000000000000,
"33" : 2041004032.0000000000000000,
"34" : 2041004032.0000000000000000,
"35" : 2041004032.0000000000000000,
"36" : 2041004032.0000000000000000,
"37" : 2041004032.0000000000000000,
"38" : 2041004032.0000000000000000,
"39" : 2041004032.0000000000000000,
"40" : 2041004032.0000000000000000,
"41" : 2041004032.0000000000000000,
"42" : 2041004032.0000000000000000,
"43" : 2041004032.0000000000000000,
"44" : 2041004032.0000000000000000,
"45" : 2041004032.0000000000000000,
"46" : 2041004032.0000000000000000,
"47" : 2041004032.0000000000000000,
"48" : 2041004032.0000000000000000,
"49" : 2041004032.0000000000000000,
"50" : 2041004032.0000000000000000,
"51" : 2041004032.0000000000000000,
"52" : 2041004032.0000000000000000,
"53" : 2041004032.0000000000000000,
"54" : 2041004032.0000000000000000,
"55" : 2041004032.0000000000000000,
"56" : 2041004032.0000000000000000,
"57" : 2041004032.0000000000000000,
"58" : 2041004032.0000000000000000,
"59" : 2041004032.0000000000000000
},
"1" : {
"0" : 2041004032.0000000000000000,
"1" : 2041004032.0000000000000000,
"2" : 2041004032.0000000000000000,
"3" : 2041004032.0000000000000000,
"4" : 2041004032.0000000000000000,
"5" : 2041004032.0000000000000000,
"6" : 2041004032.0000000000000000,
"7" : 2041004032.0000000000000000,
"8" : 2041004032.0000000000000000,
"9" : 2041004032.0000000000000000,
"10" : 2041004032.0000000000000000,
"11" : 2041004032.0000000000000000,
"12" : 2041004032.0000000000000000,
"13" : 2041004032.0000000000000000,
"14" : 2041004032.0000000000000000,
"15" : 2041004032.0000000000000000,
"16" : 2041004032.0000000000000000,
"17" : 2041004032.0000000000000000,
"18" : 2041004032.0000000000000000,
"19" : 2041004032.0000000000000000,
"20" : 2041004032.0000000000000000,
"21" : 2041004032.0000000000000000,
"22" : 2041004032.0000000000000000,
"23" : 2041004032.0000000000000000,
"24" : 2041004032.0000000000000000,
"25" : 2041004032.0000000000000000,
"26" : 2041004032.0000000000000000,
"27" : 2041004032.0000000000000000,
"28" : 2041004032.0000000000000000,
"29" : 2041004032.0000000000000000,
"30" : 2041004032.0000000000000000,
"31" : 2041004032.0000000000000000,
"32" : 2041004032.0000000000000000,
"33" : 2041004032.0000000000000000,
"34" : 2041004032.0000000000000000,
"35" : 2041004032.0000000000000000,
"36" : 2041004032.0000000000000000,
"37" : 2041004032.0000000000000000,
"38" : 2041004032.0000000000000000,
"39" : 2041004032.0000000000000000,
"40" : 2041004032.0000000000000000,
"41" : 2041004032.0000000000000000,
"42" : 2041004032.0000000000000000,
"43" : 2041004032.0000000000000000,
"44" : 2041004032.0000000000000000,
"45" : 2041004032.0000000000000000,
"46" : 2041004032.0000000000000000,
"47" : 2041004032.0000000000000000,
"48" : 2041004032.0000000000000000,
"49" : 2041004032.0000000000000000,
"50" : 2041004032.0000000000000000,
"51" : 2041004032.0000000000000000,
"52" : 2041004032.0000000000000000,
"53" : 2041004032.0000000000000000,
"54" : 2041004032.0000000000000000,
"55" : 2041004032.0000000000000000,
"56" : 2041004032.0000000000000000,
"57" : 2041004032.0000000000000000,
"58" : 2041004032.0000000000000000,
"59" : 2041004032.0000000000000000
....
我想知道从“2014-10-13T23:00:00.000”到“2014-10-15T23:00:00.000”的平均 memfree。所以我需要对从 13 到 15 的所有值求和(3600*24*2 值)。
我认为查询将是这样的,但我不知道如何解释 avg 命令.....
db.metrics.aggregate( [
{ $match: { name: 'memfree' ,timestamp_hour:{$gte: ISODate("2014-10-13T23:00:00.000+02:00")},timestamp_hour:{$lte: ISODate("2014-10-15T23:00:00.000+02:00")} } },
{
$group: {
_id: "$name",
avg: { $avg: "how can get all the values??" }
}
}
] )
有什么建议吗?
谢谢
编辑:
正确答案(适用于一对多指标)是:
map = function() {
for (var min in this.values)
for (sec in this.values[min]){
data = {value: {}, count: {}}
data.value[this.name] = this.values[min][sec]
data.count[this.name] = 1
emit(this.name, data);
}
}
reduce = function(key, values) {
var sum = values.reduce(function(a, b) {
out = {value: {}, count: {},avg:0}
for (k in b.value){
incount = a.count[k] || 0
invalue = a.value[k] || 0
out.value[k] = b.value[k]+invalue
out.count[k] = b.count[k]+incount
}
out.avg = out.value[k]/out.count[k]
return out
});
return sum;
}
printjson(db.node0208_26608.mapReduce(map, reduce,
{
query: { name: {$in:['ioutil','memtotal','memfree']} ,
timestamp_hour:{$gte: ISODate("2014-09-22T00:00:00.000+02:00")},
timestamp_hour:{$lte: ISODate("2014-09-28T23:00:00.000+02:00")}
},
//to write directly on a collection
//out:{merge: "map_reduce_out"},
out: {inline:1},
verbose:true
})
)
产生这个结果:
{
"results" : [
{
"_id" : "ioutil",
"value" : {
"value" : {
"ioutil" : 2495762.106280909
},
"count" : {
"ioutil" : 601200
},
"avg" : 4.15130090865088
}
},
{
"_id" : "memfree",
"value" : {
"value" : {
"memfree" : 28500447903744
},
"count" : {
"memfree" : 601200
},
"avg" : 47405934.636966065
}
},
{
"_id" : "memtotal",
"value" : {
"value" : {
"memtotal" : 635834327040000
},
"count" : {
"memtotal" : 594000
},
"avg" : 1070428160
}
}
],
"counts" : {
"input" : NumberLong(499),
"emit" : NumberLong(1796400),
"reduce" : NumberLong(11),
"output" : NumberLong(3)
},
"timeMillis" : 37956,
"timing" : {
"shardProcessing" : 37948,
"postProcessing" : 8
},
"shardCounts" : {
"192.168.0.19:27017" : {
"input" : 165,
"emit" : 594000,
"reduce" : 4,
"output" : 1
},
"192.168.0.20:27017" : {
"input" : 334,
"emit" : 1202400,
"reduce" : 7,
"output" : 2
}
},
"postProcessCounts" : {
"192.168.0.21:27017" : {
"input" : NumberLong(3),
"reduce" : NumberLong(0),
"output" : NumberLong(3)
}
},
"ok" : 1
}
使用聚合框架很难实现这一点。但它 "works" 与 MapReduce 配合得很好。类似的东西(未经测试):
// collect *individual* values
map = function() {
for (var min in this.values)
for (sec in this.values[min])
data = {value: {}, count: {}}
data.value[this.name] = this.values[min][sec]
data.count[this.name] = 1
emit(null, data);
}
// sum values and count
reduce = function(key, values) {
var sum = values.reduce(function(a, b) {
out = {value: {}, count: {}}
for (k in b.value)
incount = a.count[k] || 0
invalue = a.value[k] || 0
out.value[k] = b.value[k]+invalue
out.count[k] = b.count[k]+incount
return out
});
return sum;
}
我有一个 mongo 分片集群,我在其中保存来自虚拟机监控系统 (zabbix ecc) 的数据。现在我想从数据库中获取一些信息,例如一个虚拟机最近 2 天的 avg memfree。 我阅读了有关聚合的教程以及与 sql 的比较,但我不明白如何查询时间序列文档(写得像 mongo 在网络研讨会上建议的那样)。
示例:我收集了很多这样的文档(一个文档代表 1 小时):
"_id" : ObjectId("5558ab960e8956234285de14"),
"timestamp_hour" : ISODate("2014-10-13T23:00:00.000+02:00"),
"name" : "memfree",
"unity" : "B",
"values" : {
"0" : {
"0" : 2041004032.0000000000000000,
"1" : 2041004032.0000000000000000,
"2" : 2041004032.0000000000000000,
"3" : 2041004032.0000000000000000,
"4" : 2041004032.0000000000000000,
"5" : 2041004032.0000000000000000,
"6" : 2041004032.0000000000000000,
"7" : 2041004032.0000000000000000,
"8" : 2041004032.0000000000000000,
"9" : 2041004032.0000000000000000,
"10" : 2041004032.0000000000000000,
"11" : 2041004032.0000000000000000,
"12" : 2041004032.0000000000000000,
"13" : 2041004032.0000000000000000,
"14" : 2041004032.0000000000000000,
"15" : 2041004032.0000000000000000,
"16" : 2041004032.0000000000000000,
"17" : 2041004032.0000000000000000,
"18" : 2041004032.0000000000000000,
"19" : 2041004032.0000000000000000,
"20" : 2041004032.0000000000000000,
"21" : 2041004032.0000000000000000,
"22" : 2041004032.0000000000000000,
"23" : 2041004032.0000000000000000,
"24" : 2041004032.0000000000000000,
"25" : 2041004032.0000000000000000,
"26" : 2041004032.0000000000000000,
"27" : 2041004032.0000000000000000,
"28" : 2041004032.0000000000000000,
"29" : 2041004032.0000000000000000,
"30" : 2041004032.0000000000000000,
"31" : 2041004032.0000000000000000,
"32" : 2041004032.0000000000000000,
"33" : 2041004032.0000000000000000,
"34" : 2041004032.0000000000000000,
"35" : 2041004032.0000000000000000,
"36" : 2041004032.0000000000000000,
"37" : 2041004032.0000000000000000,
"38" : 2041004032.0000000000000000,
"39" : 2041004032.0000000000000000,
"40" : 2041004032.0000000000000000,
"41" : 2041004032.0000000000000000,
"42" : 2041004032.0000000000000000,
"43" : 2041004032.0000000000000000,
"44" : 2041004032.0000000000000000,
"45" : 2041004032.0000000000000000,
"46" : 2041004032.0000000000000000,
"47" : 2041004032.0000000000000000,
"48" : 2041004032.0000000000000000,
"49" : 2041004032.0000000000000000,
"50" : 2041004032.0000000000000000,
"51" : 2041004032.0000000000000000,
"52" : 2041004032.0000000000000000,
"53" : 2041004032.0000000000000000,
"54" : 2041004032.0000000000000000,
"55" : 2041004032.0000000000000000,
"56" : 2041004032.0000000000000000,
"57" : 2041004032.0000000000000000,
"58" : 2041004032.0000000000000000,
"59" : 2041004032.0000000000000000
},
"1" : {
"0" : 2041004032.0000000000000000,
"1" : 2041004032.0000000000000000,
"2" : 2041004032.0000000000000000,
"3" : 2041004032.0000000000000000,
"4" : 2041004032.0000000000000000,
"5" : 2041004032.0000000000000000,
"6" : 2041004032.0000000000000000,
"7" : 2041004032.0000000000000000,
"8" : 2041004032.0000000000000000,
"9" : 2041004032.0000000000000000,
"10" : 2041004032.0000000000000000,
"11" : 2041004032.0000000000000000,
"12" : 2041004032.0000000000000000,
"13" : 2041004032.0000000000000000,
"14" : 2041004032.0000000000000000,
"15" : 2041004032.0000000000000000,
"16" : 2041004032.0000000000000000,
"17" : 2041004032.0000000000000000,
"18" : 2041004032.0000000000000000,
"19" : 2041004032.0000000000000000,
"20" : 2041004032.0000000000000000,
"21" : 2041004032.0000000000000000,
"22" : 2041004032.0000000000000000,
"23" : 2041004032.0000000000000000,
"24" : 2041004032.0000000000000000,
"25" : 2041004032.0000000000000000,
"26" : 2041004032.0000000000000000,
"27" : 2041004032.0000000000000000,
"28" : 2041004032.0000000000000000,
"29" : 2041004032.0000000000000000,
"30" : 2041004032.0000000000000000,
"31" : 2041004032.0000000000000000,
"32" : 2041004032.0000000000000000,
"33" : 2041004032.0000000000000000,
"34" : 2041004032.0000000000000000,
"35" : 2041004032.0000000000000000,
"36" : 2041004032.0000000000000000,
"37" : 2041004032.0000000000000000,
"38" : 2041004032.0000000000000000,
"39" : 2041004032.0000000000000000,
"40" : 2041004032.0000000000000000,
"41" : 2041004032.0000000000000000,
"42" : 2041004032.0000000000000000,
"43" : 2041004032.0000000000000000,
"44" : 2041004032.0000000000000000,
"45" : 2041004032.0000000000000000,
"46" : 2041004032.0000000000000000,
"47" : 2041004032.0000000000000000,
"48" : 2041004032.0000000000000000,
"49" : 2041004032.0000000000000000,
"50" : 2041004032.0000000000000000,
"51" : 2041004032.0000000000000000,
"52" : 2041004032.0000000000000000,
"53" : 2041004032.0000000000000000,
"54" : 2041004032.0000000000000000,
"55" : 2041004032.0000000000000000,
"56" : 2041004032.0000000000000000,
"57" : 2041004032.0000000000000000,
"58" : 2041004032.0000000000000000,
"59" : 2041004032.0000000000000000
....
我想知道从“2014-10-13T23:00:00.000”到“2014-10-15T23:00:00.000”的平均 memfree。所以我需要对从 13 到 15 的所有值求和(3600*24*2 值)。 我认为查询将是这样的,但我不知道如何解释 avg 命令.....
db.metrics.aggregate( [
{ $match: { name: 'memfree' ,timestamp_hour:{$gte: ISODate("2014-10-13T23:00:00.000+02:00")},timestamp_hour:{$lte: ISODate("2014-10-15T23:00:00.000+02:00")} } },
{
$group: {
_id: "$name",
avg: { $avg: "how can get all the values??" }
}
}
] )
有什么建议吗?
谢谢
编辑: 正确答案(适用于一对多指标)是:
map = function() {
for (var min in this.values)
for (sec in this.values[min]){
data = {value: {}, count: {}}
data.value[this.name] = this.values[min][sec]
data.count[this.name] = 1
emit(this.name, data);
}
}
reduce = function(key, values) {
var sum = values.reduce(function(a, b) {
out = {value: {}, count: {},avg:0}
for (k in b.value){
incount = a.count[k] || 0
invalue = a.value[k] || 0
out.value[k] = b.value[k]+invalue
out.count[k] = b.count[k]+incount
}
out.avg = out.value[k]/out.count[k]
return out
});
return sum;
}
printjson(db.node0208_26608.mapReduce(map, reduce,
{
query: { name: {$in:['ioutil','memtotal','memfree']} ,
timestamp_hour:{$gte: ISODate("2014-09-22T00:00:00.000+02:00")},
timestamp_hour:{$lte: ISODate("2014-09-28T23:00:00.000+02:00")}
},
//to write directly on a collection
//out:{merge: "map_reduce_out"},
out: {inline:1},
verbose:true
})
)
产生这个结果:
{
"results" : [
{
"_id" : "ioutil",
"value" : {
"value" : {
"ioutil" : 2495762.106280909
},
"count" : {
"ioutil" : 601200
},
"avg" : 4.15130090865088
}
},
{
"_id" : "memfree",
"value" : {
"value" : {
"memfree" : 28500447903744
},
"count" : {
"memfree" : 601200
},
"avg" : 47405934.636966065
}
},
{
"_id" : "memtotal",
"value" : {
"value" : {
"memtotal" : 635834327040000
},
"count" : {
"memtotal" : 594000
},
"avg" : 1070428160
}
}
],
"counts" : {
"input" : NumberLong(499),
"emit" : NumberLong(1796400),
"reduce" : NumberLong(11),
"output" : NumberLong(3)
},
"timeMillis" : 37956,
"timing" : {
"shardProcessing" : 37948,
"postProcessing" : 8
},
"shardCounts" : {
"192.168.0.19:27017" : {
"input" : 165,
"emit" : 594000,
"reduce" : 4,
"output" : 1
},
"192.168.0.20:27017" : {
"input" : 334,
"emit" : 1202400,
"reduce" : 7,
"output" : 2
}
},
"postProcessCounts" : {
"192.168.0.21:27017" : {
"input" : NumberLong(3),
"reduce" : NumberLong(0),
"output" : NumberLong(3)
}
},
"ok" : 1
}
使用聚合框架很难实现这一点。但它 "works" 与 MapReduce 配合得很好。类似的东西(未经测试):
// collect *individual* values
map = function() {
for (var min in this.values)
for (sec in this.values[min])
data = {value: {}, count: {}}
data.value[this.name] = this.values[min][sec]
data.count[this.name] = 1
emit(null, data);
}
// sum values and count
reduce = function(key, values) {
var sum = values.reduce(function(a, b) {
out = {value: {}, count: {}}
for (k in b.value)
incount = a.count[k] || 0
invalue = a.value[k] || 0
out.value[k] = b.value[k]+invalue
out.count[k] = b.count[k]+incount
return out
});
return sum;
}