按多个范围查询文档计数返回范围 start/end 与匹配元素计数
Query document count by multiple ranges returning range start/end with matching element count
我一直在尝试对这些文档创建查询:
[
{
"timestamp": new ISODate('2020-01-01T00:00:00'),
"objectId": "Id_A",
"locationId": "Location_A"
},
{
"timestamp": new ISODate('2021-01-01T00:00:00'),
"objectId": "Id_A",
"locationId": "Location_A"
},
{
"timestamp": new ISODate('2022-01-01T00:00:00'),
"objectId": "Id_A",
"locationId": "Location_B"
},
{
"timestamp": new ISODate('2021-01-01T00:00:00'),
"objectId": "Id_B",
"locationId": "Location_B"
},
{
"timestamp": new ISODate('2022-01-01T00:00:00'),
"objectId": "Id_A",
"locationId": "Location_A"
}
]
给定多个“and”查询,我想计算每个范围的匹配文档
[$or: [
{ $and: [{
"timestamp": {$gte: new ISODate('2020-01-01T00:00:00'),
$lt: new ISODate('2020-12-31T00:00:00'),
},
"objectId": "Id_A",
"locationId": "Location_A"}]},
{ $and: [{
"timestamp": {$gte: new ISODate('2020-01-01T00:00:00'),
$lt: new ISODate('2022-12-31T00:00:00'),
},
"objectId": "Id_A",
"locationId": "Location_A"}]},
{ $and: [{
"timestamp": {$gte: new ISODate('2022-01-01T00:00:00'),
$lt: new ISODate('2022-12-31T00:00:00'),
},
"objectId": "Id_A",
"locationId": "Location_B"}]}
]
]
我想将计数映射到如下所示的结果结构
[
{"objectId": "Id_A", "locationId": "Location_A", "rangeStart:": new ISODate('2020-01-01T00:00:00'), "rangeEnd": new ISODate('2020-12-31T00:00:00'), "count": 1},
{"objectId": "Id_A", "locationId": "Location_A", "rangeStart:": new ISODate('2020-01-01T00:00:00'), "rangeEnd": new ISODate('2022-12-31T00:00:00'), "count": 3},
{"objectId": "Id_A", "locationId": "Location_B", "rangeStart:": new ISODate('2022-01-01T00:00:00'), "rangeEnd": new ISODate('2022-12-31T00:00:00'), "count": 1},
]
到目前为止我查看的聚合:
- 桶
- 侧面
- 组
可是我还是想不通。
您的方法是什么?
更新 1
我从@Takis 建议得出的解决方案未正确分配 rangeKeys:
我得到以下结果:
{
"_id" : {
"objectId" : "objectA",
"locationId" : "locationA",
"rangeKey" : "UUID2",
"count" : 1.0
}
}
虽然我希望得到以下结果:
{
"_id" : {
"objectId" : "objectA",
"locationId" : "locationA",
"rangeKey" : "UUID1",
"count" : 1.0
}
}, {
"_id" : {
"objectId" : "objectB",
"locationId" : "locationA",
"rangeKey" : "UUID2",
"count" : 0.0
}
}
这是我构建的查询
db.createCollection("object_location_tracking")
db.getCollection("object_location_tracking").insertMany([
{
_id: "1",
locationId: "locationA",
objectId: "objectA",
timestamp: ISODate("2020-01-01T00:00:00Z")
},
{
_id: "2",
locationId: "locationB",
objectId: "objectA",
timestamp: ISODate("2020-01-01T00:00:00Z")
},
{
_id: "3",
locationId: "locationA",
objectId: "objectB",
timestamp: ISODate("2019-01-01T00:00:00Z")
},
{
_id: "4",
locationId: "locationB",
objectId: "objectB",
timestamp: ISODate("2020-01-01T00:00:00Z")
}
]);
db.getCollection("object_location_tracking").aggregate(
[
{
"$match" : {
"locationId" : "locationA",
"$or" : [
{
"$and" : [
{
"objectId" : "objectA"
},
{
"timestamp" : {
"$gte" : ISODate("2020-01-01T00:00:00.000+0000")
}
},
{
"timestamp" : {
"$lt" : ISODate("2022-01-01T00:00:00.000+0000")
}
}
]
},
{
"$and" : [
{
"objectId" : "objectB"
},
{
"timestamp" : {
"$gte" : ISODate("2020-01-01T00:00:00.000+0000")
}
},
{
"timestamp" : {
"$lt" : ISODate("2022-01-01T00:00:00.000+0000")
}
}
]
}
]
}
},
{
"$group" : {
"_id" : {
"objectId" : "$objectId",
"locationId" : "$locationId",
"rangeKey" : {
"$switch" : {
"branches" : [
{
"case" : {
"$and" : [
{
"$gte" : [
"$timestamp",
ISODate("2020-01-01T00:00:00.000+0000")
]
},
{
"$lt" : [
"$timestamp",
ISODate("2022-01-01T00:00:00.000+0000")
]
},
{
"objectId" : "objectB"
},
{
"locationId" : "locationA"
}
]
},
"then" : "UUDI2"
},
{
"case" : {
"$and" : [
{
"$gte" : [
"$timestamp",
ISODate("2020-01-01T00:00:00.000+0000")
]
},
{
"$lt" : [
"$timestamp",
ISODate("2022-01-01T00:00:00.000+0000")
]
},
{
"objectId" : "objectA"
},
{
"locationId" : "locationA"
}
]
},
"then" : "UUID2"
}
],
"default" : "0"
}
},
"count" : {
"$sum" : 1.0
}
}
}
}
],
{
"allowDiskUse" : true
}
);
正如评论中所暗示的那样,$facet
可以解决问题。请注意,为简单起见,ISODate
使用了 year-only 构造函数。 $project
和 $unwind
不是绝对必要的,因为它们只是根据 OP 的格式方便。 $facet
将只有一个文档,它将被转换为三个且只有三个离散的范围文档,因此它不会影响性能。
db.foo.aggregate([
{$facet: {
"first_bucket": [
{$match: {"objectId":"Id_A",
"locationId":"Location_A",
"timestamp": {$gte: new ISODate('2020-01-01'),
$lt: new ISODate('2020-12-31')}
}},
{$count: "N"}
],
"second_bucket": [
{$match: {"objectId":"Id_A",
"locationId":"Location_A",
"timestamp": {$gte: new ISODate('2020-01-01'),
$lt: new ISODate('2022-12-31')}
}},
{$count: "N"}
],
"third_bucket": [
{$match: {"objectId":"Id_A",
"locationId":"Location_B",
"timestamp": {$gte: new ISODate('2022-01-01'),
$lt: new ISODate('2022-12-31')}
}},
{$count: "N"}
]
}},
{$project: {X: [
{"objectId":"Id_A",
"locationId":"Location_A",
"rangeStart": new ISODate('2020-01-01'),
"rangeEnd": new ISODate('2020-12-31'),
"count": {$first: '$first_bucket.N'}
},
{"objectId":"Id_A",
"locationId":"Location_A",
"rangeStart": new ISODate('2020-01-01'),
"rangeEnd": new ISODate('2022-12-31'),
"count": {$first: '$second_bucket.N'}
},
{"objectId":"Id_A",
"locationId":"Location_B",
"rangeStart": new ISODate('2022-01-01'),
"rangeEnd": new ISODate('2022-12-31'),
"count": {$first: '$third_bucket.N'}
}
]
}},
{$unwind: '$X'},
{$replaceRoot: {newRoot: '$X'}}
]);
更新
$first
是 v>=4.4 上可用的运算符。要使此解决方案在 v<4.4 中有效,请将 $project
中的 count
表达式从 $first
更改为:
"count": {$arrayElemAt:['$the_bucket.N',0]}
一个更有趣的变体是在 $facet
表达式中使用 $group
。这将在存储桶中产生更多条目,但具有仅对日期范围进行硬编码的优点。
db.foo.aggregate([
{$facet: {
"first_bucket": [
{$match: {"timestamp": {$gte: new ISODate('2020-01-01'),
$lt: new ISODate('2020-12-31')}
}},
{$group: {_id: {objectId: "$objectId", locationId:"$locationId"},
N: {$sum:1}}}
],
"second_bucket": [
{$match: {"timestamp": {$gte: new ISODate('2020-01-01'),
$lt: new ISODate('2022-12-31')}
}},
{$group: {_id: {objectId: "$objectId", locationId:"$locationId"},
N: {$sum:1}}}
],
"third_bucket": [
{$match: {"timestamp": {$gte: new ISODate('2022-01-01'),
$lt: new ISODate('2022-12-31')}
}},
{$group: {_id: {objectId: "$objectId", locationId:"$locationId"},
N: {$sum:1}}}
]
}}
]);
查询
- 您可以添加一个
$match
作为第一阶段以仅保留有效范围
(这也可以使用索引)
- 按 objectId 和 locationId 以及条件范围分组
- 我没有测试下面的查询,因为我没有样本数据,如果不起作用,你就卡住了,如果你可以添加样本数据和预期输出
*$facet
可以使用,但 facet 存在这些问题(测试它以查看什么更适合您的查询)
- 不使用索引(即使匹配是第一阶段)
- 多次运行管道,每个字段 1 次
aggregate(
[{"$group":
{"_id":
{"objectId":"$objectId",
"locationId":"$locationId",
"range":
{"$switch":
{"branches":
[{"case":
{"$and":
[{"$gte":["$timestamp", ISODate("2020-01-01T00:00:00Z")]},
{"$lt":["$timestamp", ISODate("2020-12-31T00:00:00Z")]}]},
"then":
{"rangeStart":ISODate("2020-01-01T00:00:00Z"),
"rangeEnd":ISODate("2020-12-31T00:00:00Z")}},
{"case":
{"$and":
[{"$gte":["$timestamp", ISODate("2020-01-01T00:00:00Z")]},
{"$lt":["$timestamp", ISODate("2020-12-31T00:00:00Z")]}]},
"then":
{"rangeStart":ISODate("2020-01-01T00:00:00Z"),
"rangeEnd":ISODate("2020-12-31T00:00:00Z")}},
{"case":
{"$and":
[{"$gte":["$timestamp", ISODate("2020-01-01T00:00:00Z")]},
{"$lt":["$timestamp", ISODate("2020-12-31T00:00:00Z")]}]},
"then":
{"rangeStart":ISODate("2020-01-01T00:00:00Z"),
"rangeEnd":ISODate("2020-12-31T00:00:00Z")}}],
"default":"out-of-range"}}},
"count":{"$sum":1}}},
{"$project":
{"_id":0,
"count":1,
"objectId":"$_id.objectId",
"locationId":"$_id.locationId",
"range":"$_id.range"}}])
我一直在尝试对这些文档创建查询:
[
{
"timestamp": new ISODate('2020-01-01T00:00:00'),
"objectId": "Id_A",
"locationId": "Location_A"
},
{
"timestamp": new ISODate('2021-01-01T00:00:00'),
"objectId": "Id_A",
"locationId": "Location_A"
},
{
"timestamp": new ISODate('2022-01-01T00:00:00'),
"objectId": "Id_A",
"locationId": "Location_B"
},
{
"timestamp": new ISODate('2021-01-01T00:00:00'),
"objectId": "Id_B",
"locationId": "Location_B"
},
{
"timestamp": new ISODate('2022-01-01T00:00:00'),
"objectId": "Id_A",
"locationId": "Location_A"
}
]
给定多个“and”查询,我想计算每个范围的匹配文档
[$or: [
{ $and: [{
"timestamp": {$gte: new ISODate('2020-01-01T00:00:00'),
$lt: new ISODate('2020-12-31T00:00:00'),
},
"objectId": "Id_A",
"locationId": "Location_A"}]},
{ $and: [{
"timestamp": {$gte: new ISODate('2020-01-01T00:00:00'),
$lt: new ISODate('2022-12-31T00:00:00'),
},
"objectId": "Id_A",
"locationId": "Location_A"}]},
{ $and: [{
"timestamp": {$gte: new ISODate('2022-01-01T00:00:00'),
$lt: new ISODate('2022-12-31T00:00:00'),
},
"objectId": "Id_A",
"locationId": "Location_B"}]}
]
]
我想将计数映射到如下所示的结果结构
[
{"objectId": "Id_A", "locationId": "Location_A", "rangeStart:": new ISODate('2020-01-01T00:00:00'), "rangeEnd": new ISODate('2020-12-31T00:00:00'), "count": 1},
{"objectId": "Id_A", "locationId": "Location_A", "rangeStart:": new ISODate('2020-01-01T00:00:00'), "rangeEnd": new ISODate('2022-12-31T00:00:00'), "count": 3},
{"objectId": "Id_A", "locationId": "Location_B", "rangeStart:": new ISODate('2022-01-01T00:00:00'), "rangeEnd": new ISODate('2022-12-31T00:00:00'), "count": 1},
]
到目前为止我查看的聚合:
- 桶
- 侧面
- 组
可是我还是想不通。 您的方法是什么?
更新 1 我从@Takis 建议得出的解决方案未正确分配 rangeKeys:
我得到以下结果:
{
"_id" : {
"objectId" : "objectA",
"locationId" : "locationA",
"rangeKey" : "UUID2",
"count" : 1.0
}
}
虽然我希望得到以下结果:
{
"_id" : {
"objectId" : "objectA",
"locationId" : "locationA",
"rangeKey" : "UUID1",
"count" : 1.0
}
}, {
"_id" : {
"objectId" : "objectB",
"locationId" : "locationA",
"rangeKey" : "UUID2",
"count" : 0.0
}
}
这是我构建的查询
db.createCollection("object_location_tracking")
db.getCollection("object_location_tracking").insertMany([
{
_id: "1",
locationId: "locationA",
objectId: "objectA",
timestamp: ISODate("2020-01-01T00:00:00Z")
},
{
_id: "2",
locationId: "locationB",
objectId: "objectA",
timestamp: ISODate("2020-01-01T00:00:00Z")
},
{
_id: "3",
locationId: "locationA",
objectId: "objectB",
timestamp: ISODate("2019-01-01T00:00:00Z")
},
{
_id: "4",
locationId: "locationB",
objectId: "objectB",
timestamp: ISODate("2020-01-01T00:00:00Z")
}
]);
db.getCollection("object_location_tracking").aggregate(
[
{
"$match" : {
"locationId" : "locationA",
"$or" : [
{
"$and" : [
{
"objectId" : "objectA"
},
{
"timestamp" : {
"$gte" : ISODate("2020-01-01T00:00:00.000+0000")
}
},
{
"timestamp" : {
"$lt" : ISODate("2022-01-01T00:00:00.000+0000")
}
}
]
},
{
"$and" : [
{
"objectId" : "objectB"
},
{
"timestamp" : {
"$gte" : ISODate("2020-01-01T00:00:00.000+0000")
}
},
{
"timestamp" : {
"$lt" : ISODate("2022-01-01T00:00:00.000+0000")
}
}
]
}
]
}
},
{
"$group" : {
"_id" : {
"objectId" : "$objectId",
"locationId" : "$locationId",
"rangeKey" : {
"$switch" : {
"branches" : [
{
"case" : {
"$and" : [
{
"$gte" : [
"$timestamp",
ISODate("2020-01-01T00:00:00.000+0000")
]
},
{
"$lt" : [
"$timestamp",
ISODate("2022-01-01T00:00:00.000+0000")
]
},
{
"objectId" : "objectB"
},
{
"locationId" : "locationA"
}
]
},
"then" : "UUDI2"
},
{
"case" : {
"$and" : [
{
"$gte" : [
"$timestamp",
ISODate("2020-01-01T00:00:00.000+0000")
]
},
{
"$lt" : [
"$timestamp",
ISODate("2022-01-01T00:00:00.000+0000")
]
},
{
"objectId" : "objectA"
},
{
"locationId" : "locationA"
}
]
},
"then" : "UUID2"
}
],
"default" : "0"
}
},
"count" : {
"$sum" : 1.0
}
}
}
}
],
{
"allowDiskUse" : true
}
);
正如评论中所暗示的那样,$facet
可以解决问题。请注意,为简单起见,ISODate
使用了 year-only 构造函数。 $project
和 $unwind
不是绝对必要的,因为它们只是根据 OP 的格式方便。 $facet
将只有一个文档,它将被转换为三个且只有三个离散的范围文档,因此它不会影响性能。
db.foo.aggregate([
{$facet: {
"first_bucket": [
{$match: {"objectId":"Id_A",
"locationId":"Location_A",
"timestamp": {$gte: new ISODate('2020-01-01'),
$lt: new ISODate('2020-12-31')}
}},
{$count: "N"}
],
"second_bucket": [
{$match: {"objectId":"Id_A",
"locationId":"Location_A",
"timestamp": {$gte: new ISODate('2020-01-01'),
$lt: new ISODate('2022-12-31')}
}},
{$count: "N"}
],
"third_bucket": [
{$match: {"objectId":"Id_A",
"locationId":"Location_B",
"timestamp": {$gte: new ISODate('2022-01-01'),
$lt: new ISODate('2022-12-31')}
}},
{$count: "N"}
]
}},
{$project: {X: [
{"objectId":"Id_A",
"locationId":"Location_A",
"rangeStart": new ISODate('2020-01-01'),
"rangeEnd": new ISODate('2020-12-31'),
"count": {$first: '$first_bucket.N'}
},
{"objectId":"Id_A",
"locationId":"Location_A",
"rangeStart": new ISODate('2020-01-01'),
"rangeEnd": new ISODate('2022-12-31'),
"count": {$first: '$second_bucket.N'}
},
{"objectId":"Id_A",
"locationId":"Location_B",
"rangeStart": new ISODate('2022-01-01'),
"rangeEnd": new ISODate('2022-12-31'),
"count": {$first: '$third_bucket.N'}
}
]
}},
{$unwind: '$X'},
{$replaceRoot: {newRoot: '$X'}}
]);
更新
$first
是 v>=4.4 上可用的运算符。要使此解决方案在 v<4.4 中有效,请将 $project
中的 count
表达式从 $first
更改为:
"count": {$arrayElemAt:['$the_bucket.N',0]}
一个更有趣的变体是在 $facet
表达式中使用 $group
。这将在存储桶中产生更多条目,但具有仅对日期范围进行硬编码的优点。
db.foo.aggregate([
{$facet: {
"first_bucket": [
{$match: {"timestamp": {$gte: new ISODate('2020-01-01'),
$lt: new ISODate('2020-12-31')}
}},
{$group: {_id: {objectId: "$objectId", locationId:"$locationId"},
N: {$sum:1}}}
],
"second_bucket": [
{$match: {"timestamp": {$gte: new ISODate('2020-01-01'),
$lt: new ISODate('2022-12-31')}
}},
{$group: {_id: {objectId: "$objectId", locationId:"$locationId"},
N: {$sum:1}}}
],
"third_bucket": [
{$match: {"timestamp": {$gte: new ISODate('2022-01-01'),
$lt: new ISODate('2022-12-31')}
}},
{$group: {_id: {objectId: "$objectId", locationId:"$locationId"},
N: {$sum:1}}}
]
}}
]);
查询
- 您可以添加一个
$match
作为第一阶段以仅保留有效范围 (这也可以使用索引) - 按 objectId 和 locationId 以及条件范围分组
- 我没有测试下面的查询,因为我没有样本数据,如果不起作用,你就卡住了,如果你可以添加样本数据和预期输出
*$facet
可以使用,但 facet 存在这些问题(测试它以查看什么更适合您的查询)
- 不使用索引(即使匹配是第一阶段)
- 多次运行管道,每个字段 1 次
aggregate(
[{"$group":
{"_id":
{"objectId":"$objectId",
"locationId":"$locationId",
"range":
{"$switch":
{"branches":
[{"case":
{"$and":
[{"$gte":["$timestamp", ISODate("2020-01-01T00:00:00Z")]},
{"$lt":["$timestamp", ISODate("2020-12-31T00:00:00Z")]}]},
"then":
{"rangeStart":ISODate("2020-01-01T00:00:00Z"),
"rangeEnd":ISODate("2020-12-31T00:00:00Z")}},
{"case":
{"$and":
[{"$gte":["$timestamp", ISODate("2020-01-01T00:00:00Z")]},
{"$lt":["$timestamp", ISODate("2020-12-31T00:00:00Z")]}]},
"then":
{"rangeStart":ISODate("2020-01-01T00:00:00Z"),
"rangeEnd":ISODate("2020-12-31T00:00:00Z")}},
{"case":
{"$and":
[{"$gte":["$timestamp", ISODate("2020-01-01T00:00:00Z")]},
{"$lt":["$timestamp", ISODate("2020-12-31T00:00:00Z")]}]},
"then":
{"rangeStart":ISODate("2020-01-01T00:00:00Z"),
"rangeEnd":ISODate("2020-12-31T00:00:00Z")}}],
"default":"out-of-range"}}},
"count":{"$sum":1}}},
{"$project":
{"_id":0,
"count":1,
"objectId":"$_id.objectId",
"locationId":"$_id.locationId",
"range":"$_id.range"}}])