在 MongoDB/Java Spring Boot 中获取月份统计
Get month wise count in MongoDB/Java Springboot
我在 mongodb 中收集了以下数据:
集合名称:runState
runId: 1
startTime:2020-09-16T20:56:06.598+00:00
endTime:2020-09-16T20:57:09.196+00:00
product_action: org_rhel_oracle_install
Task: completed
ranBy:David
runId: 2
startTime:2021-01-11T20:56:06.598+00:00
endTime:2021-01-11T20:56:09.196+00:00
product_action: org_rhel_oracle_install
Task: completed
ranBy:John
runId: 2
startTime:2021-01-27T20:56:06.598+00:00
endTime:2021-01-27T20:56:09.196+00:00
product_action: org_rhel_oracle_install
Task: completed
ranBy:John
runId: 3
startTime:2021-01-11T20:56:06.598+00:00
endTime:2021-01-11T20:57:09.196+00:00
product_action: org_rhel_postgres_install
Task: completed
ranBy:John
runId: 4
startTime:2021-02-09T20:56:06.598+00:00
endTime:2021-02-09T20:57:09.196+00:00
product_action: org_rhel_oracle_install
Task: completed
ranBy:John
runId: 5
startTime:2021-02-09T20:56:06.598+00:00
endTime:2021-02-09T20:57:09.196+00:00
product_action: org_rhel_postgres_install
Task: completed
ranBy:John
runId: 6
startTime:2021-09-09T20:56:06.598+00:00
endTime:2021-09-09T20:57:09.196+00:00
product_action: org_rhel_postgres_install
Task: completed
ranBy:John
runId: 7
startTime:2022-01-09T20:56:06.598+00:00
endTime:2022-01-09T20:57:09.196+00:00
product_action: org_rhel_oracle_install
Task: completed
ranBy:David
runId: 8
startTime:2022-01-10T20:56:06.598+00:00
endTime:2022-01-10T20:57:09.196+00:00
product_action: org_rhel_oracle_install
Task: failed
ranBy:David
我想要输出作为过去 12 个月(2021 年 1 月到 2022 年 1 月)每个完成任务的产品的计数(产品可从 product_action 获取)
输出应采用以下格式:
{
"_id" : "postgres",
completed: [
{
"month" : "FEB-2021",
"count" : 1
},
{
"month" : "SEP-2021",
"count" : 1
},
{
"month" : "JAN-2021",
"count" : 1
}
]
},
{
"_id" : "oracle",
"completed" : [
{
"month" : "FEB-2021",
"count" : 1
},
{
"month" : "JAN-2021",
"count" : 2
}
]
}
我从下面开始,但不确定如何像上面那样计算月份。
{"product_action":{$regex:"postgres|oracle"},"Task":"completed"}
因为这对我来说是新的,有人可以帮助我 mongo 数据库查询以获得结果并在 Java springboot 中编写代码来实现这个吗?
Java 我尝试使用聚合的代码,但这并没有产生我想要的结果。
Aggregation agg = Aggregation.newAggregation(
Aggregation.project("endTime","Task","product_action").and(DateOperators.Month.monthOf("endTime")).as("month"),
Aggregation.match(Criteria.where("product_action").regex("postgres|oracle").and("Task").is("completed")
.and("endTime").gte(parseDate("2021-02-01"))),
Aggregation.group("month","Task").count().as("count")
);
试穿尺码:
db.foo.aggregate([
// Get easy stuff out way. Filter for the desired date range and only
// those items that are complete:
{$match: {$and: [
{"endTime":{$gte:new ISODate("2021-01-01")}},
{"endTime":{$lt:new ISODate("2022-01-01")}},
{"Task":"completed"}
]} }
// Now group by product and date expressed as month-year. The product
// is embedded in the field value so there are a few approaches to digging
// it out. Here, we split on underscore and take the [2] item.
,{$group: {_id: {
p: {$arrayElemAt:[{$split:["$product_action","_"]},2]},
d: {$dateToString: {date: "$endTime", format: "%m-%Y"}}
},
n: {$sum: 1}
}}
// The OP seeks to make the date component nested inside the product
// instead of having it as a two-part grouping. We will "regroup" and
// create an array. This is slightly different than the format indicated
// by the OP but values as keys (e.g. "Jan-2021: 2") is in general a
// poor idea so instead we construct an array of proper name:value pairs.
,{$group: {_id: '$_id.p',
completed: {$push: {d: '$_id.d', n: '$n'}}
}}
]);
产生
{
"_id" : "postgres",
"completed" : [
{
"d" : "02-2021",
"n" : 1
},
{
"d" : "09-2021",
"n" : 1
},
{
"d" : "01-2021",
"n" : 1
}
]
}
{
"_id" : "oracle",
"completed" : [
{
"d" : "02-2021",
"n" : 1
},
{
"d" : "01-2021",
"n" : 2
}
]
}
已更新
之前有人提出 $dateToString
函数没有格式参数来生成一个月的 3 个字母缩写,例如JAN
(或长格式,例如 January
)。排序仍然适用于 01-2021,02-2021,04-2021
与 JAN-2021,FEB-2021,APR-2021
但如果确实需要直接从数据库而不是 client-side 代码中的 post-processing 这样的输出,则第二组被替换为$sort
和$group
如下:
// Ensure the NN-YYYY dates are going in increasing order. The product
// component _id.p does not matter here -- only the dates have to be
// increasing. NOTE: This is OPTIONAL with respect to changing
// NN-YYYY into MON-YYYY but almost always the follow on question is
// how to get the completed list in date order...
,{$sort: {'_id.d':1}}
// Regroup as before but index the NN part of NN-YYYY into an
// array of 3 letter abbrevs, then reconstruct the string with the
// dash and the year component. Remember: the order of the _id
// in the doc stream coming out of $group is not deterministic
// but the array created by $push will preserve the order in
// which it was pushed -- which is the date-ascending sorted order
// from the prior stage.
,{$group: {_id: '$_id.p',
completed: {$push: {
d: {$concat: [
{$arrayElemAt:[ ['JAN','FEB','MAR',
'APR','MAY','JUN',
'JUL','AUG','SEP',
'OCT','NOV','DEC'],
// minus 1 to adjust for zero-based array:
{$subtract:[{$toInt: {$substr:['$_id.d',0,2]}},1]}
]},
"-",
{$substr:['$_id.d',3,4]}
]},
n: '$n'}}
}}
产生:
{
"_id" : "postgres",
"completed" : [
{
"d" : "JAN-2021",
"n" : 1
},
{
"d" : "FEB-2021",
"n" : 1
},
{
"d" : "SEP-2021",
"n" : 1
}
]
}
{
"_id" : "oracle",
"completed" : [
{
"d" : "JAN-2021",
"n" : 2
},
{
"d" : "FEB-2021",
"n" : 1
}
]
}
至于将其转换为 Java,有几种方法,但除非需要大量的程序控制,否则将查询捕获为“宽松的 JSON”(键周围不需要引号) 在 Java 中的字符串中并调用 Document.parse()
似乎是最简单的方法。可以在此处找到包含辅助函数和适当的 Java 驱动程序调用的完整示例:https://moschetti.org/rants/mongoaggcvt.html 但其要点是:
private static class StageHelper {
private StringBuilder txt;
public StageHelper() {
this.txt = new StringBuilder();
}
public void add(String expr, Object ... subs) {
expr.replace("'", "\""); // This is the helpful part.
if(subs.length > 0) {
expr = String.format(expr, subs); // this too
}
txt.append(expr);
}
public Document fetch() {
Document b = Document.parse(txt.toString());
return b;
}
}
private List<Document> makePipeline() {
List<Document> pipeline = new ArrayList<Document>();
StageHelper s = new StageHelper();
s.add("{$match: {$and: [ ");
// Note use of EJSON here plus string substitution of dates:
s.add(" {endTime:{$gte: {$date: '%s'}} }", "2021-01-01");
s.add(" {endTime:{$lt: {$date: '%s'}} }", "2022-01-01");
s.add(" {Task:'completed'} ");
s.add("]} } ");
pipeline.add(s.fetch());
s = new StageHelper();
s.add("{$group: {_id: { ");
s.add(" p: {$arrayElemAt:[{$split:['$product_action','_']},2]}, ");
s.add(" d: {$dateToString: {date: '$endTime', 'format': '%m-%Y'}} ");
s.add(" }, ");
s.add(" n: {$sum: 1} ");
s.add("}} ");
pipeline.add(s.fetch());
s = new StageHelper();
s.add("{$sort: {'_id.d':1}} ");
pipeline.add(s.fetch());
s = new StageHelper();
s.add("{$group: {_id: '$_id.p', ");
s.add(" completed: {$push: { ");
s.add(" d: {$concat: [ ");
s.add(" {$arrayElemAt:[ ['JAN','FEB','MAR', ");
s.add(" 'APR','MAY','JUN', ");
s.add(" 'JUL','AUG','SEP', ");
s.add(" 'OCT','NOV','DEC'], ");
s.add(" {$subtract:[{$toInt: {$substr:['$_id.d',0,2]}},1]} ");
s.add(" ]}, ");
s.add(" '-', ");
s.add(" {$substr:['$_id.d',3,4]} ");
s.add(" ]}, ");
s.add(" n: '$n'}} ");
s.add(" }} ");
pipeline.add(s.fetch());
return pipeline;
}
...
import com.mongodb.client.MongoCursor;
import com.mongodb.client.AggregateIterable;
AggregateIterable<Document> output = coll.aggregate(pipeline);
MongoCursor<Document> iterator = output.iterator();
while (iterator.hasNext()) {
Document doc = iterator.next();
我在 mongodb 中收集了以下数据:
集合名称:runState
runId: 1
startTime:2020-09-16T20:56:06.598+00:00
endTime:2020-09-16T20:57:09.196+00:00
product_action: org_rhel_oracle_install
Task: completed
ranBy:David
runId: 2
startTime:2021-01-11T20:56:06.598+00:00
endTime:2021-01-11T20:56:09.196+00:00
product_action: org_rhel_oracle_install
Task: completed
ranBy:John
runId: 2
startTime:2021-01-27T20:56:06.598+00:00
endTime:2021-01-27T20:56:09.196+00:00
product_action: org_rhel_oracle_install
Task: completed
ranBy:John
runId: 3
startTime:2021-01-11T20:56:06.598+00:00
endTime:2021-01-11T20:57:09.196+00:00
product_action: org_rhel_postgres_install
Task: completed
ranBy:John
runId: 4
startTime:2021-02-09T20:56:06.598+00:00
endTime:2021-02-09T20:57:09.196+00:00
product_action: org_rhel_oracle_install
Task: completed
ranBy:John
runId: 5
startTime:2021-02-09T20:56:06.598+00:00
endTime:2021-02-09T20:57:09.196+00:00
product_action: org_rhel_postgres_install
Task: completed
ranBy:John
runId: 6
startTime:2021-09-09T20:56:06.598+00:00
endTime:2021-09-09T20:57:09.196+00:00
product_action: org_rhel_postgres_install
Task: completed
ranBy:John
runId: 7
startTime:2022-01-09T20:56:06.598+00:00
endTime:2022-01-09T20:57:09.196+00:00
product_action: org_rhel_oracle_install
Task: completed
ranBy:David
runId: 8
startTime:2022-01-10T20:56:06.598+00:00
endTime:2022-01-10T20:57:09.196+00:00
product_action: org_rhel_oracle_install
Task: failed
ranBy:David
我想要输出作为过去 12 个月(2021 年 1 月到 2022 年 1 月)每个完成任务的产品的计数(产品可从 product_action 获取)
输出应采用以下格式:
{
"_id" : "postgres",
completed: [
{
"month" : "FEB-2021",
"count" : 1
},
{
"month" : "SEP-2021",
"count" : 1
},
{
"month" : "JAN-2021",
"count" : 1
}
]
},
{
"_id" : "oracle",
"completed" : [
{
"month" : "FEB-2021",
"count" : 1
},
{
"month" : "JAN-2021",
"count" : 2
}
]
}
我从下面开始,但不确定如何像上面那样计算月份。
{"product_action":{$regex:"postgres|oracle"},"Task":"completed"}
因为这对我来说是新的,有人可以帮助我 mongo 数据库查询以获得结果并在 Java springboot 中编写代码来实现这个吗?
Java 我尝试使用聚合的代码,但这并没有产生我想要的结果。
Aggregation agg = Aggregation.newAggregation(
Aggregation.project("endTime","Task","product_action").and(DateOperators.Month.monthOf("endTime")).as("month"),
Aggregation.match(Criteria.where("product_action").regex("postgres|oracle").and("Task").is("completed")
.and("endTime").gte(parseDate("2021-02-01"))),
Aggregation.group("month","Task").count().as("count")
);
试穿尺码:
db.foo.aggregate([
// Get easy stuff out way. Filter for the desired date range and only
// those items that are complete:
{$match: {$and: [
{"endTime":{$gte:new ISODate("2021-01-01")}},
{"endTime":{$lt:new ISODate("2022-01-01")}},
{"Task":"completed"}
]} }
// Now group by product and date expressed as month-year. The product
// is embedded in the field value so there are a few approaches to digging
// it out. Here, we split on underscore and take the [2] item.
,{$group: {_id: {
p: {$arrayElemAt:[{$split:["$product_action","_"]},2]},
d: {$dateToString: {date: "$endTime", format: "%m-%Y"}}
},
n: {$sum: 1}
}}
// The OP seeks to make the date component nested inside the product
// instead of having it as a two-part grouping. We will "regroup" and
// create an array. This is slightly different than the format indicated
// by the OP but values as keys (e.g. "Jan-2021: 2") is in general a
// poor idea so instead we construct an array of proper name:value pairs.
,{$group: {_id: '$_id.p',
completed: {$push: {d: '$_id.d', n: '$n'}}
}}
]);
产生
{
"_id" : "postgres",
"completed" : [
{
"d" : "02-2021",
"n" : 1
},
{
"d" : "09-2021",
"n" : 1
},
{
"d" : "01-2021",
"n" : 1
}
]
}
{
"_id" : "oracle",
"completed" : [
{
"d" : "02-2021",
"n" : 1
},
{
"d" : "01-2021",
"n" : 2
}
]
}
已更新
之前有人提出 $dateToString
函数没有格式参数来生成一个月的 3 个字母缩写,例如JAN
(或长格式,例如 January
)。排序仍然适用于 01-2021,02-2021,04-2021
与 JAN-2021,FEB-2021,APR-2021
但如果确实需要直接从数据库而不是 client-side 代码中的 post-processing 这样的输出,则第二组被替换为$sort
和$group
如下:
// Ensure the NN-YYYY dates are going in increasing order. The product
// component _id.p does not matter here -- only the dates have to be
// increasing. NOTE: This is OPTIONAL with respect to changing
// NN-YYYY into MON-YYYY but almost always the follow on question is
// how to get the completed list in date order...
,{$sort: {'_id.d':1}}
// Regroup as before but index the NN part of NN-YYYY into an
// array of 3 letter abbrevs, then reconstruct the string with the
// dash and the year component. Remember: the order of the _id
// in the doc stream coming out of $group is not deterministic
// but the array created by $push will preserve the order in
// which it was pushed -- which is the date-ascending sorted order
// from the prior stage.
,{$group: {_id: '$_id.p',
completed: {$push: {
d: {$concat: [
{$arrayElemAt:[ ['JAN','FEB','MAR',
'APR','MAY','JUN',
'JUL','AUG','SEP',
'OCT','NOV','DEC'],
// minus 1 to adjust for zero-based array:
{$subtract:[{$toInt: {$substr:['$_id.d',0,2]}},1]}
]},
"-",
{$substr:['$_id.d',3,4]}
]},
n: '$n'}}
}}
产生:
{
"_id" : "postgres",
"completed" : [
{
"d" : "JAN-2021",
"n" : 1
},
{
"d" : "FEB-2021",
"n" : 1
},
{
"d" : "SEP-2021",
"n" : 1
}
]
}
{
"_id" : "oracle",
"completed" : [
{
"d" : "JAN-2021",
"n" : 2
},
{
"d" : "FEB-2021",
"n" : 1
}
]
}
至于将其转换为 Java,有几种方法,但除非需要大量的程序控制,否则将查询捕获为“宽松的 JSON”(键周围不需要引号) 在 Java 中的字符串中并调用 Document.parse()
似乎是最简单的方法。可以在此处找到包含辅助函数和适当的 Java 驱动程序调用的完整示例:https://moschetti.org/rants/mongoaggcvt.html 但其要点是:
private static class StageHelper {
private StringBuilder txt;
public StageHelper() {
this.txt = new StringBuilder();
}
public void add(String expr, Object ... subs) {
expr.replace("'", "\""); // This is the helpful part.
if(subs.length > 0) {
expr = String.format(expr, subs); // this too
}
txt.append(expr);
}
public Document fetch() {
Document b = Document.parse(txt.toString());
return b;
}
}
private List<Document> makePipeline() {
List<Document> pipeline = new ArrayList<Document>();
StageHelper s = new StageHelper();
s.add("{$match: {$and: [ ");
// Note use of EJSON here plus string substitution of dates:
s.add(" {endTime:{$gte: {$date: '%s'}} }", "2021-01-01");
s.add(" {endTime:{$lt: {$date: '%s'}} }", "2022-01-01");
s.add(" {Task:'completed'} ");
s.add("]} } ");
pipeline.add(s.fetch());
s = new StageHelper();
s.add("{$group: {_id: { ");
s.add(" p: {$arrayElemAt:[{$split:['$product_action','_']},2]}, ");
s.add(" d: {$dateToString: {date: '$endTime', 'format': '%m-%Y'}} ");
s.add(" }, ");
s.add(" n: {$sum: 1} ");
s.add("}} ");
pipeline.add(s.fetch());
s = new StageHelper();
s.add("{$sort: {'_id.d':1}} ");
pipeline.add(s.fetch());
s = new StageHelper();
s.add("{$group: {_id: '$_id.p', ");
s.add(" completed: {$push: { ");
s.add(" d: {$concat: [ ");
s.add(" {$arrayElemAt:[ ['JAN','FEB','MAR', ");
s.add(" 'APR','MAY','JUN', ");
s.add(" 'JUL','AUG','SEP', ");
s.add(" 'OCT','NOV','DEC'], ");
s.add(" {$subtract:[{$toInt: {$substr:['$_id.d',0,2]}},1]} ");
s.add(" ]}, ");
s.add(" '-', ");
s.add(" {$substr:['$_id.d',3,4]} ");
s.add(" ]}, ");
s.add(" n: '$n'}} ");
s.add(" }} ");
pipeline.add(s.fetch());
return pipeline;
}
...
import com.mongodb.client.MongoCursor;
import com.mongodb.client.AggregateIterable;
AggregateIterable<Document> output = coll.aggregate(pipeline);
MongoCursor<Document> iterator = output.iterator();
while (iterator.hasNext()) {
Document doc = iterator.next();