按多个范围查询文档计数返回范围 start/end 与匹配元素计数

Query document count by multiple ranges returning range start/end with matching element count

我一直在尝试对这些文档创建查询:

[
    {
        "timestamp": new ISODate('2020-01-01T00:00:00'),
        "objectId": "Id_A",
        "locationId": "Location_A"
    },
    {
        "timestamp": new ISODate('2021-01-01T00:00:00'),
        "objectId": "Id_A",
        "locationId": "Location_A"
    },
    {
        "timestamp": new ISODate('2022-01-01T00:00:00'),
        "objectId": "Id_A",
        "locationId": "Location_B"
    },
    {
        "timestamp": new ISODate('2021-01-01T00:00:00'),
        "objectId": "Id_B",
        "locationId": "Location_B"
    },
    {
        "timestamp": new ISODate('2022-01-01T00:00:00'),
        "objectId": "Id_A",
        "locationId": "Location_A"
    }
]

给定多个“and”查询,我想计算每个范围的匹配文档

[$or: [
    { $and: [{
        "timestamp": {$gte: new ISODate('2020-01-01T00:00:00'),
                      $lt: new ISODate('2020-12-31T00:00:00'),
        },
        "objectId": "Id_A",
        "locationId": "Location_A"}]},
    { $and: [{
        "timestamp": {$gte: new ISODate('2020-01-01T00:00:00'),
                      $lt: new ISODate('2022-12-31T00:00:00'),
        },
        "objectId": "Id_A",
        "locationId": "Location_A"}]},
    { $and: [{
        "timestamp": {$gte: new ISODate('2022-01-01T00:00:00'),
                      $lt: new ISODate('2022-12-31T00:00:00'),
        },
        "objectId": "Id_A",
        "locationId": "Location_B"}]}
    ]
]

我想将计数映射到如下所示的结果结构

[
    {"objectId": "Id_A", "locationId": "Location_A", "rangeStart:": new ISODate('2020-01-01T00:00:00'), "rangeEnd": new ISODate('2020-12-31T00:00:00'), "count": 1},
    {"objectId": "Id_A", "locationId": "Location_A", "rangeStart:": new ISODate('2020-01-01T00:00:00'), "rangeEnd": new ISODate('2022-12-31T00:00:00'), "count": 3},
    {"objectId": "Id_A", "locationId": "Location_B", "rangeStart:": new ISODate('2022-01-01T00:00:00'), "rangeEnd": new ISODate('2022-12-31T00:00:00'), "count": 1},
]

到目前为止我查看的聚合:

可是我还是想不通。 您的方法是什么?

更新 1 我从@Takis 建议得出的解决方案未正确分配 rangeKeys:

我得到以下结果:

{ 
    "_id" : {
        "objectId" : "objectA", 
        "locationId" : "locationA", 
        "rangeKey" : "UUID2", 
        "count" : 1.0
    }
}

虽然我希望得到以下结果:

{ 
    "_id" : {
        "objectId" : "objectA", 
        "locationId" : "locationA", 
        "rangeKey" : "UUID1",
        "count" : 1.0
    }
}, {
    
    "_id" : {
        "objectId" : "objectB", 
        "locationId" : "locationA", 
        "rangeKey" : "UUID2",
        "count" : 0.0
    }
}

这是我构建的查询

db.createCollection("object_location_tracking")
db.getCollection("object_location_tracking").insertMany([
    {
        _id: "1",
        locationId: "locationA",
        objectId: "objectA",
        timestamp: ISODate("2020-01-01T00:00:00Z")
    },
    {
        _id: "2",
        locationId: "locationB",
        objectId: "objectA",
        timestamp: ISODate("2020-01-01T00:00:00Z")
    },
    {
        _id: "3",
        locationId: "locationA",
        objectId: "objectB",
        timestamp: ISODate("2019-01-01T00:00:00Z")
    },
    {
        _id: "4",
        locationId: "locationB",
        objectId: "objectB",
        timestamp: ISODate("2020-01-01T00:00:00Z")
    }
]);
db.getCollection("object_location_tracking").aggregate(
    [
        { 
            "$match" : { 
                "locationId" : "locationA", 
                "$or" : [
                    { 
                        "$and" : [
                            { 
                                "objectId" : "objectA"
                            }, 
                            { 
                                "timestamp" : { 
                                    "$gte" : ISODate("2020-01-01T00:00:00.000+0000")
                                }
                            }, 
                            { 
                                "timestamp" : { 
                                    "$lt" : ISODate("2022-01-01T00:00:00.000+0000")
                                }
                            }
                        ]
                    }, 
                    { 
                        "$and" : [
                            { 
                                "objectId" : "objectB"
                            }, 
                            { 
                                "timestamp" : { 
                                    "$gte" : ISODate("2020-01-01T00:00:00.000+0000")
                                }
                            }, 
                            { 
                                "timestamp" : { 
                                    "$lt" : ISODate("2022-01-01T00:00:00.000+0000")
                                }
                            }
                        ]
                    }
                ]
            }
        }, 
        { 
            "$group" : { 
                "_id" : { 
                    "objectId" : "$objectId", 
                    "locationId" : "$locationId", 
                    "rangeKey" : { 
                        "$switch" : { 
                            "branches" : [
                                { 
                                    "case" : { 
                                        "$and" : [
                                            { 
                                                "$gte" : [
                                                    "$timestamp", 
                                                    ISODate("2020-01-01T00:00:00.000+0000")
                                                ]
                                            }, 
                                            { 
                                                "$lt" : [
                                                    "$timestamp", 
                                                    ISODate("2022-01-01T00:00:00.000+0000")
                                                ]
                                            }, 
                                            { 
                                                "objectId" : "objectB"
                                            }, 
                                            { 
                                                "locationId" : "locationA"
                                            }
                                        ]
                                    }, 
                                    "then" : "UUDI2"
                                }, 
                                { 
                                    "case" : { 
                                        "$and" : [
                                            { 
                                                "$gte" : [
                                                    "$timestamp", 
                                                    ISODate("2020-01-01T00:00:00.000+0000")
                                                ]
                                            }, 
                                            { 
                                                "$lt" : [
                                                    "$timestamp", 
                                                    ISODate("2022-01-01T00:00:00.000+0000")
                                                ]
                                            }, 
                                            { 
                                                "objectId" : "objectA"
                                            }, 
                                            { 
                                                "locationId" : "locationA"
                                            }
                                        ]
                                    }, 
                                    "then" : "UUID2"
                                }
                            ], 
                            "default" : "0"
                        }
                    }, 
                    "count" : { 
                        "$sum" : 1.0
                    }
                }
            }
        }
    ], 
    { 
        "allowDiskUse" : true
    }
);

正如评论中所暗示的那样,$facet 可以解决问题。请注意,为简单起见,ISODate 使用了 year-only 构造函数。 $project$unwind 不是绝对必要的,因为它们只是根据 OP 的格式方便。 $facet 将只有一个文档,它将被转换为三个且只有三个离散的范围文档,因此它不会影响性能。

db.foo.aggregate([
    {$facet: {
        "first_bucket": [
            {$match: {"objectId":"Id_A",
                      "locationId":"Location_A",
                      "timestamp": {$gte: new ISODate('2020-01-01'),
                                    $lt: new ISODate('2020-12-31')}
                     }},
            {$count: "N"}
        ],

        "second_bucket": [
            {$match: {"objectId":"Id_A",
                      "locationId":"Location_A",
                      "timestamp": {$gte: new ISODate('2020-01-01'),
                                    $lt: new ISODate('2022-12-31')}
                     }},
            {$count: "N"}
        ],

        "third_bucket": [
            {$match: {"objectId":"Id_A",
                      "locationId":"Location_B",
                      "timestamp": {$gte: new ISODate('2022-01-01'),
                                    $lt: new ISODate('2022-12-31')}
                     }},
            {$count: "N"}
        ]
    }},
    {$project: {X: [
        {"objectId":"Id_A",
         "locationId":"Location_A",
         "rangeStart": new ISODate('2020-01-01'),
         "rangeEnd": new ISODate('2020-12-31'),
         "count": {$first: '$first_bucket.N'}
        },
        {"objectId":"Id_A",
         "locationId":"Location_A",
         "rangeStart": new ISODate('2020-01-01'),
         "rangeEnd": new ISODate('2022-12-31'),
         "count": {$first: '$second_bucket.N'}
        },
        {"objectId":"Id_A",
         "locationId":"Location_B",
         "rangeStart": new ISODate('2022-01-01'),
         "rangeEnd": new ISODate('2022-12-31'),
         "count": {$first: '$third_bucket.N'}
    }
    ]
    }},
    {$unwind: '$X'},
    {$replaceRoot: {newRoot: '$X'}}
]);

更新

$first 是 v>=4.4 上可用的运算符。要使此解决方案在 v<4.4 中有效,请将 $project 中的 count 表达式从 $first 更改为:

 "count": {$arrayElemAt:['$the_bucket.N',0]}

一个更有趣的变体是在 $facet 表达式中使用 $group。这将在存储桶中产生更多条目,但具有仅对日期范围进行硬编码的优点。

db.foo.aggregate([
    {$facet: {
        "first_bucket": [
            {$match: {"timestamp": {$gte: new ISODate('2020-01-01'),
                                    $lt: new ISODate('2020-12-31')}
                     }},
            {$group: {_id: {objectId: "$objectId", locationId:"$locationId"},
                      N: {$sum:1}}}
        ],
        "second_bucket": [
            {$match: {"timestamp": {$gte: new ISODate('2020-01-01'),
                                    $lt: new ISODate('2022-12-31')}
                     }},
            {$group: {_id: {objectId: "$objectId", locationId:"$locationId"},
                      N: {$sum:1}}}
        ],
        "third_bucket": [
            {$match: {"timestamp": {$gte: new ISODate('2022-01-01'),
                                    $lt: new ISODate('2022-12-31')}
                     }},
            {$group: {_id: {objectId: "$objectId", locationId:"$locationId"},
                      N: {$sum:1}}}
        ]

    }}
]);

查询

  • 您可以添加一个 $match 作为第一阶段以仅保留有效范围 (这也可以使用索引)
  • 按 objectId 和 locationId 以及条件范围分组
  • 我没有测试下面的查询,因为我没有样本数据,如果不起作用,你就卡住了,如果你可以添加样本数据和预期输出

*$facet 可以使用,但 facet 存在这些问题(测试它以查看什么更适合您的查询)

  • 不使用索引(即使匹配是第一阶段)
  • 多次运行管道,每个字段 1 次
aggregate(
[{"$group":
   {"_id":
     {"objectId":"$objectId",
      "locationId":"$locationId",
      "range":
       {"$switch":
         {"branches":
           [{"case":
               {"$and":
                 [{"$gte":["$timestamp", ISODate("2020-01-01T00:00:00Z")]},
                   {"$lt":["$timestamp", ISODate("2020-12-31T00:00:00Z")]}]},
              "then":
               {"rangeStart":ISODate("2020-01-01T00:00:00Z"),
                "rangeEnd":ISODate("2020-12-31T00:00:00Z")}},
             {"case":
               {"$and":
                 [{"$gte":["$timestamp", ISODate("2020-01-01T00:00:00Z")]},
                   {"$lt":["$timestamp", ISODate("2020-12-31T00:00:00Z")]}]},
              "then":
               {"rangeStart":ISODate("2020-01-01T00:00:00Z"),
                "rangeEnd":ISODate("2020-12-31T00:00:00Z")}},
             {"case":
               {"$and":
                 [{"$gte":["$timestamp", ISODate("2020-01-01T00:00:00Z")]},
                   {"$lt":["$timestamp", ISODate("2020-12-31T00:00:00Z")]}]},
              "then":
               {"rangeStart":ISODate("2020-01-01T00:00:00Z"),
                "rangeEnd":ISODate("2020-12-31T00:00:00Z")}}],
          "default":"out-of-range"}}},
    "count":{"$sum":1}}},
 {"$project":
   {"_id":0,
    "count":1,
    "objectId":"$_id.objectId",
    "locationId":"$_id.locationId",
    "range":"$_id.range"}}])