在 MongoDB/Java Spring Boot 中获取月份统计

Get month wise count in MongoDB/Java Springboot

我在 mongodb 中收集了以下数据:

集合名称:runState

runId: 1
startTime:2020-09-16T20:56:06.598+00:00
endTime:2020-09-16T20:57:09.196+00:00
product_action: org_rhel_oracle_install
Task: completed
ranBy:David

runId: 2
startTime:2021-01-11T20:56:06.598+00:00
endTime:2021-01-11T20:56:09.196+00:00
product_action: org_rhel_oracle_install
Task: completed
ranBy:John

runId: 2
startTime:2021-01-27T20:56:06.598+00:00
endTime:2021-01-27T20:56:09.196+00:00
product_action: org_rhel_oracle_install
Task: completed
ranBy:John

runId: 3
startTime:2021-01-11T20:56:06.598+00:00
endTime:2021-01-11T20:57:09.196+00:00
product_action: org_rhel_postgres_install
Task: completed
ranBy:John

runId: 4
startTime:2021-02-09T20:56:06.598+00:00
endTime:2021-02-09T20:57:09.196+00:00
product_action: org_rhel_oracle_install
Task: completed
ranBy:John

runId: 5
startTime:2021-02-09T20:56:06.598+00:00
endTime:2021-02-09T20:57:09.196+00:00
product_action: org_rhel_postgres_install
Task: completed
ranBy:John

runId: 6
startTime:2021-09-09T20:56:06.598+00:00
endTime:2021-09-09T20:57:09.196+00:00
product_action: org_rhel_postgres_install
Task: completed
ranBy:John

runId: 7
startTime:2022-01-09T20:56:06.598+00:00
endTime:2022-01-09T20:57:09.196+00:00
product_action: org_rhel_oracle_install
Task: completed
ranBy:David

runId: 8
startTime:2022-01-10T20:56:06.598+00:00
endTime:2022-01-10T20:57:09.196+00:00
product_action: org_rhel_oracle_install
Task: failed
ranBy:David

我想要输出作为过去 12 个月(2021 年 1 月到 2022 年 1 月)每个完成任务的产品的计数(产品可从 product_action 获取)

输出应采用以下格式:

{
"_id" : "postgres",
completed: [
        {
            "month" : "FEB-2021",
            "count" : 1
        },
        {
            "month" : "SEP-2021",
            "count" : 1
        },
        {
            "month" : "JAN-2021",
            "count" : 1
        }
]
},
{
"_id" : "oracle",
    "completed" : [
        {
            "month" : "FEB-2021",
            "count" : 1
        },
        {
            "month" : "JAN-2021",
            "count" : 2
        }
    ]
}

我从下面开始,但不确定如何像上面那样计算月份。

{"product_action":{$regex:"postgres|oracle"},"Task":"completed"}

因为这对我来说是新的,有人可以帮助我 mongo 数据库查询以获得结果并在 Java springboot 中编写代码来实现这个吗?

Java 我尝试使用聚合的代码,但这并没有产生我想要的结果。

Aggregation agg = Aggregation.newAggregation(
                Aggregation.project("endTime","Task","product_action").and(DateOperators.Month.monthOf("endTime")).as("month"),
                Aggregation.match(Criteria.where("product_action").regex("postgres|oracle").and("Task").is("completed")
                        .and("endTime").gte(parseDate("2021-02-01"))),
                Aggregation.group("month","Task").count().as("count")
                );

试穿尺码:

db.foo.aggregate([
    // Get easy stuff out way. Filter for the desired date range and only
    // those items that are complete:
    {$match: {$and: [
        {"endTime":{$gte:new ISODate("2021-01-01")}},
        {"endTime":{$lt:new ISODate("2022-01-01")}},
        {"Task":"completed"}
    ]} }

    // Now group by product and date expressed as month-year.  The product
    // is embedded in the field value so there are a few approaches to digging
    // it out.  Here, we split on underscore and take the [2] item.
    ,{$group: {_id: {
        p: {$arrayElemAt:[{$split:["$product_action","_"]},2]},
        d: {$dateToString: {date: "$endTime", format: "%m-%Y"}}
               },
               n: {$sum: 1}
    }}

    // The OP seeks to make the date component nested inside the product
    // instead of having it as a two-part grouping.  We will "regroup" and
    // create an array.  This is slightly different than the format indicated
    // by the OP but values as keys (e.g. "Jan-2021: 2") is in general a 
    // poor idea so instead we construct an array of proper name:value pairs.
    ,{$group: {_id: '$_id.p',
               completed: {$push: {d: '$_id.d', n: '$n'}}
    }}

]);

产生

{
    "_id" : "postgres",
    "completed" : [
        {
            "d" : "02-2021",
            "n" : 1
        },
        {
            "d" : "09-2021",
            "n" : 1
        },
        {
            "d" : "01-2021",
            "n" : 1
        }
    ]
}
{
    "_id" : "oracle",
    "completed" : [
        {
            "d" : "02-2021",
            "n" : 1
        },
        {
            "d" : "01-2021",
            "n" : 2
        }
    ]
}

已更新

之前有人提出 $dateToString 函数没有格式参数来生成一个月的 3 个字母缩写,例如JAN(或长格式,例如 January)。排序仍然适用于 01-2021,02-2021,04-2021JAN-2021,FEB-2021,APR-2021 但如果确实需要直接从数据库而不是 client-side 代码中的 post-processing 这样的输出,则第二组被替换为$sort$group如下:

    // Ensure the NN-YYYY dates are going in increasing order.  The product
    // component _id.p does not matter here -- only the dates have to be
    // increasing.  NOTE:  This is OPTIONAL with respect to changing
    // NN-YYYY into MON-YYYY but almost always the follow on question is
    // how to get the completed list in date order...
    ,{$sort: {'_id.d':1}}

    // Regroup as before but index the NN part of NN-YYYY into an
    // array of 3 letter abbrevs, then reconstruct the string with the
    // dash and the year component.  Remember: the order of the _id 
    // in the doc stream coming out of $group is not deterministic
    // but the array created by $push will preserve the order in
    // which it was pushed -- which is the date-ascending sorted order
    // from the prior stage.
    ,{$group: {_id: '$_id.p',
               completed: {$push: {
                   d: {$concat: [
                       {$arrayElemAt:[ ['JAN','FEB','MAR',
                                        'APR','MAY','JUN',
                                        'JUL','AUG','SEP',
                                        'OCT','NOV','DEC'],
                       // minus 1 to adjust for zero-based array:
                       {$subtract:[{$toInt: {$substr:['$_id.d',0,2]}},1]}
                             ]},
                       "-",
                       {$substr:['$_id.d',3,4]}
                   ]},
                   n: '$n'}}
              }}

产生:

{
    "_id" : "postgres",
    "completed" : [
        {
            "d" : "JAN-2021",
            "n" : 1
        },
        {
            "d" : "FEB-2021",
            "n" : 1
        },
        {
            "d" : "SEP-2021",
            "n" : 1
        }
    ]
}
{
    "_id" : "oracle",
    "completed" : [
        {
            "d" : "JAN-2021",
            "n" : 2
        },
        {
            "d" : "FEB-2021",
            "n" : 1
        }
    ]
}

至于将其转换为 Java,有几种方法,但除非需要大量的程序控制,否则将查询捕获为“宽松的 JSON”(键周围不需要引号) 在 Java 中的字符串中并调用 Document.parse() 似乎是最简单的方法。可以在此处找到包含辅助函数和适当的 Java 驱动程序调用的完整示例:https://moschetti.org/rants/mongoaggcvt.html 但其要点是:

    private static class StageHelper {
        private StringBuilder txt;

        public StageHelper() {
            this.txt = new StringBuilder();
        }

        public void add(String expr, Object ... subs) {
            expr.replace("'", "\""); // This is the helpful part.                            
            if(subs.length > 0) {
                expr = String.format(expr, subs);  // this too                               
            }
            txt.append(expr);
        }
        public Document fetch() {
            Document b = Document.parse(txt.toString());
            return b;
        }
    }

    private List<Document> makePipeline() {
        List<Document> pipeline = new ArrayList<Document>();

        StageHelper s = new StageHelper();
        s.add("{$match: {$and: [ ");

        // Note use of EJSON here plus string substitution of dates:                         
        s.add("  {endTime:{$gte: {$date: '%s'}} }", "2021-01-01");
        s.add("  {endTime:{$lt: {$date: '%s'}} }", "2022-01-01");

        s.add("  {Task:'completed'} ");
        s.add("]} } ");
        pipeline.add(s.fetch());

        s = new StageHelper();
        s.add("{$group: {_id: { ");
        s.add("  p: {$arrayElemAt:[{$split:['$product_action','_']},2]}, ");
        s.add("  d: {$dateToString: {date: '$endTime', 'format': '%m-%Y'}} ");
        s.add("  }, ");
        s.add("  n: {$sum: 1}  ");
        s.add("}} ");
        pipeline.add(s.fetch());

        s = new StageHelper();
        s.add("{$sort: {'_id.d':1}} ");
        pipeline.add(s.fetch());

        s = new StageHelper();
        s.add("{$group: {_id: '$_id.p', ");
        s.add("        completed: {$push: { ");
        s.add("            d: {$concat: [ ");
        s.add("                {$arrayElemAt:[ ['JAN','FEB','MAR', ");
        s.add("                                 'APR','MAY','JUN', ");
        s.add("                                 'JUL','AUG','SEP', ");
        s.add("                                 'OCT','NOV','DEC'], ");
        s.add("                {$subtract:[{$toInt: {$substr:['$_id.d',0,2]}},1]} ");
        s.add("                      ]}, ");
        s.add("                '-', ");
        s.add("                {$substr:['$_id.d',3,4]} ");
        s.add("            ]}, ");
        s.add("            n: '$n'}} ");
        s.add("       }} ");
        pipeline.add(s.fetch());

    return pipeline;
    }
...
    import com.mongodb.client.MongoCursor;
    import com.mongodb.client.AggregateIterable;

    AggregateIterable<Document> output = coll.aggregate(pipeline);
    MongoCursor<Document> iterator = output.iterator();
    while (iterator.hasNext()) {
            Document doc = iterator.next();