Mongo 数据库聚合优化(Spring 数据)
Mongo DB aggregation optimization (Spring Data)
我在我的应用程序中使用 Spring Boot (2.4.2) 并使用 MongoDB 作为数据库。我在一个集合中有 1.1M 文档,我正在尝试进行一些聚合,我的文档结构如下所示:
在 java 代码中,我的聚合查询如下所示:
Aggregation aggregation = newAggregation(
match(where("gameRef")
.is(gameRef)),
group("platformRef", "gameRef", "currency")
.sum("bet")
.as("bet")
.sum("win")
.as("win")
.sum("data.bonusWin")
.as("bonus")
.count()
.as("count"),
project("platformRef", "gameRef", "currency")
.andInclude("bet")
.andInclude("win")
.andInclude("bonus")
.andInclude("count")
);
AggregationResults<SpinReport> results = mongoTemplate.aggregate(aggregation, SpinHistory.class, SpinReport.class);
return results.getMappedResults();
这为我提供了 MongoDB 语言的聚合:
{
"aggregate": "__collection__",
"pipeline": [
{
"$match": {
"gameRef": "6047a10c58ed573e490b8f54"
}
},
{
"$group": {
"_id": {
"platformRef": "$platformRef",
"gameRef": "$gameRef",
"currency": "$currency"
},
"bet": {
"$sum": "$bet"
},
"win": {
"$sum": "$win"
},
"bonus": {
"$sum": "$data.bonusWin"
},
"count": {
"$sum": 1
}
}
},
{
"$project": {
"platformRef": "$_id.platformRef",
"gameRef": "$_id.gameRef",
"currency": "$_id.currency",
"bet": 1,
"win": 1,
"bonus": 1,
"count": 1
}
}
]
}
执行此查询需要 5 秒(在 1.1M 文档中)。请问有什么办法可以优化吗?
我在该集合中创建了这些索引:
我可以看到在执行此查询时正在使用 gameRef 字段索引,但它在性能方面没有任何区别。还需要5秒。
是否有可能以某种方式使这项工作更快?
编辑:
by 运行 解释此查询的计划:
db.spinHistory.explain().aggregate([
{
"$match": {
"gameRef": "6047a10c58ed573e490b8f54"
}
},
{
"$project": {
"platformRef": 1,
"gameRef": 1,
"currency": 1,
"win": 1,
"bet": 1,
"bonusWin": "$data.bonusWin",
"_id": 0
}
},
{
"$group": {
"_id": {
"platformRef": "$platformRef",
"gameRef": "$gameRef",
"currency": "$currency"
},
"bet": {
"$sum": "$bet"
},
"win": {
"$sum": "$win"
},
"bonus": {
"$sum": "$data.bonusWin"
},
"count": {
"$sum": 1
}
}
},
{
"$project": {
"platformRef": "$_id.platformRef",
"gameRef": "$_id.gameRef",
"currency": "$_id.currency",
"bet": 1,
"win": 1,
"bonus": 1,
"count": 1
}
}
])
@Yahya 推荐,我可以看到这个:
{
"stages": [
{
"$cursor": {
"queryPlanner": {
"plannerVersion": 1,
"namespace": "oak9e_rgs_temp.spinHistory",
"indexFilterSet": false,
"parsedQuery": {
"gameRef": {
"$eq": "6047a10c58ed573e490b8f54"
}
},
"queryHash": "27C08187",
"planCacheKey": "E204EC8C",
"winningPlan": {
"stage": "PROJECTION_DEFAULT",
"transformBy": {
"bet": true,
"platformRef": true,
"win": true,
"currency": true,
"gameRef": true,
"bonusWin": "$data.bonusWin",
"_id": false
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"gameRef": 1,
"platformRef": 1,
"currency": 1,
"bet": 1,
"win": 1,
"data.bonusWin": 1
},
"indexName": "idx_spin_history_main_fields",
"isMultiKey": false,
"multiKeyPaths": {
"gameRef": [],
"platformRef": [],
"currency": [],
"bet": [],
"win": [],
"data.bonusWin": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"gameRef": [
"[\"6047a10c58ed573e490b8f54\", \"6047a10c58ed573e490b8f54\"]"
],
"platformRef": [
"[MinKey, MaxKey]"
],
"currency": [
"[MinKey, MaxKey]"
],
"bet": [
"[MinKey, MaxKey]"
],
"win": [
"[MinKey, MaxKey]"
],
"data.bonusWin": [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans": []
}
}
},
{
"$group": {
"_id": {
"platformRef": "$platformRef",
"gameRef": "$gameRef",
"currency": "$currency"
},
"bet": {
"$sum": "$bet"
},
"win": {
"$sum": "$win"
},
"bonus": {
"$sum": "$data.bonusWin"
},
"count": {
"$sum": {
"$const": 1
}
}
}
},
{
"$project": {
"_id": true,
"bet": true,
"bonus": true,
"count": true,
"win": true,
"platformRef": "$_id.platformRef",
"gameRef": "$_id.gameRef",
"currency": "$_id.currency"
}
}
],
"serverInfo": {
"host": "DESKTOP-V3NTFPM",
"port": 27017,
"version": "4.4.3",
"gitVersion": "913d6b62acfbb344dde1b116f4161360acd8fd13"
},
"ok": 1
}
这是我为此查询中使用的所有字段创建的索引:
{
"v": 2,
"key": {
"gameRef": 1,
"platformRef": 1,
"currency": 1,
"bet": 1,
"win": 1,
"data.bonusWin": 1
},
"name": "idx_spin_history_main_fields",
"background": false
}
执行统计数据:
{
"stages": [
{
"$cursor": {
"queryPlanner": {
"plannerVersion": 1,
"namespace": "oak9e_rgs_temp.spinHistory",
"indexFilterSet": false,
"parsedQuery": {
"gameRef": {
"$eq": "6047a10c58ed573e490b8f54"
}
},
"queryHash": "27C08187",
"planCacheKey": "E204EC8C",
"winningPlan": {
"stage": "PROJECTION_DEFAULT",
"transformBy": {
"gameRef": true,
"win": true,
"platformRef": true,
"bet": true,
"currency": true,
"bonusWin": "$data.bonusWin",
"_id": false
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"gameRef": 1,
"platformRef": 1,
"currency": 1,
"bet": 1,
"win": 1,
"data.bonusWin": 1
},
"indexName": "idx_spin_history_main_fields",
"isMultiKey": false,
"multiKeyPaths": {
"gameRef": [],
"platformRef": [],
"currency": [],
"bet": [],
"win": [],
"data.bonusWin": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"gameRef": [
"[\"6047a10c58ed573e490b8f54\", \"6047a10c58ed573e490b8f54\"]"
],
"platformRef": [
"[MinKey, MaxKey]"
],
"currency": [
"[MinKey, MaxKey]"
],
"bet": [
"[MinKey, MaxKey]"
],
"win": [
"[MinKey, MaxKey]"
],
"data.bonusWin": [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans": []
},
"executionStats": {
"executionSuccess": true,
"nReturned": 1145023,
"executionTimeMillis": 4473,
"totalKeysExamined": 1145023,
"totalDocsExamined": 0,
"executionStages": {
"stage": "PROJECTION_DEFAULT",
"nReturned": 1145023,
"executionTimeMillisEstimate": 623,
"works": 1145024,
"advanced": 1145023,
"needTime": 0,
"needYield": 0,
"saveState": 1295,
"restoreState": 1295,
"isEOF": 1,
"transformBy": {
"gameRef": true,
"win": true,
"platformRef": true,
"bet": true,
"currency": true,
"bonusWin": "$data.bonusWin",
"_id": false
},
"inputStage": {
"stage": "IXSCAN",
"nReturned": 1145023,
"executionTimeMillisEstimate": 161,
"works": 1145024,
"advanced": 1145023,
"needTime": 0,
"needYield": 0,
"saveState": 1295,
"restoreState": 1295,
"isEOF": 1,
"keyPattern": {
"gameRef": 1,
"platformRef": 1,
"currency": 1,
"bet": 1,
"win": 1,
"data.bonusWin": 1
},
"indexName": "idx_spin_history_main_fields",
"isMultiKey": false,
"multiKeyPaths": {
"gameRef": [],
"platformRef": [],
"currency": [],
"bet": [],
"win": [],
"data.bonusWin": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"gameRef": [
"[\"6047a10c58ed573e490b8f54\", \"6047a10c58ed573e490b8f54\"]"
],
"platformRef": [
"[MinKey, MaxKey]"
],
"currency": [
"[MinKey, MaxKey]"
],
"bet": [
"[MinKey, MaxKey]"
],
"win": [
"[MinKey, MaxKey]"
],
"data.bonusWin": [
"[MinKey, MaxKey]"
]
},
"keysExamined": 1145023,
"seeks": 1,
"dupsTested": 0,
"dupsDropped": 0
}
}
}
},
"nReturned": NumberLong(1145023),
"executionTimeMillisEstimate": NumberLong(4074)
},
{
"$group": {
"_id": {
"platformRef": "$platformRef",
"gameRef": "$gameRef",
"currency": "$currency"
},
"bet": {
"$sum": "$bet"
},
"win": {
"$sum": "$win"
},
"bonus": {
"$sum": "$data.bonusWin"
},
"count": {
"$sum": {
"$const": 1
}
}
},
"nReturned": NumberLong(3),
"executionTimeMillisEstimate": NumberLong(4467)
},
{
"$project": {
"_id": true,
"bonus": true,
"count": true,
"win": true,
"bet": true,
"platformRef": "$_id.platformRef",
"gameRef": "$_id.gameRef",
"currency": "$_id.currency"
},
"nReturned": NumberLong(3),
"executionTimeMillisEstimate": NumberLong(4467)
}
],
"serverInfo": {
"host": "DESKTOP-V3NTFPM",
"port": 27017,
"version": "4.4.3",
"gitVersion": "913d6b62acfbb344dde1b116f4161360acd8fd13"
},
"ok": 1
}
如果索引的大小不是问题(听起来好像不是)。然后删除您为聚合管道创建的所有这些新索引。创建以下索引:
{ gameRef: 1, platformRef: 1, currency: 1, bet: 1, win: 1, "data.bonusWin": 1 }
这实际上是在索引您需要的所有字段。
接下来你只需要投影。
所以,在match
和group
阶段之间。添加另一个投影阶段,例如:
{ _id: 0, gameRef: 1, platformRef: 1, currency: 1, bet: 1, win: 1, "data.bonusWin": 1 }
这里重要的一点是投射出来_id
对问题进行编辑后,很明显这是它能得到的最好结果。但是还是4秒长。
可能的解决方案:
- 不要每次都运行聚合管道。使用 $merge 阶段创建物化视图。并从那里查询。
- 创建摘要集合,这可以从应用程序代码本身完成,也可以使用 Change Streams 完成(它们再次在应用程序范围内工作)。
我在我的应用程序中使用 Spring Boot (2.4.2) 并使用 MongoDB 作为数据库。我在一个集合中有 1.1M 文档,我正在尝试进行一些聚合,我的文档结构如下所示:
在 java 代码中,我的聚合查询如下所示:
Aggregation aggregation = newAggregation(
match(where("gameRef")
.is(gameRef)),
group("platformRef", "gameRef", "currency")
.sum("bet")
.as("bet")
.sum("win")
.as("win")
.sum("data.bonusWin")
.as("bonus")
.count()
.as("count"),
project("platformRef", "gameRef", "currency")
.andInclude("bet")
.andInclude("win")
.andInclude("bonus")
.andInclude("count")
);
AggregationResults<SpinReport> results = mongoTemplate.aggregate(aggregation, SpinHistory.class, SpinReport.class);
return results.getMappedResults();
这为我提供了 MongoDB 语言的聚合:
{
"aggregate": "__collection__",
"pipeline": [
{
"$match": {
"gameRef": "6047a10c58ed573e490b8f54"
}
},
{
"$group": {
"_id": {
"platformRef": "$platformRef",
"gameRef": "$gameRef",
"currency": "$currency"
},
"bet": {
"$sum": "$bet"
},
"win": {
"$sum": "$win"
},
"bonus": {
"$sum": "$data.bonusWin"
},
"count": {
"$sum": 1
}
}
},
{
"$project": {
"platformRef": "$_id.platformRef",
"gameRef": "$_id.gameRef",
"currency": "$_id.currency",
"bet": 1,
"win": 1,
"bonus": 1,
"count": 1
}
}
]
}
执行此查询需要 5 秒(在 1.1M 文档中)。请问有什么办法可以优化吗? 我在该集合中创建了这些索引:
我可以看到在执行此查询时正在使用 gameRef 字段索引,但它在性能方面没有任何区别。还需要5秒。
是否有可能以某种方式使这项工作更快?
编辑:
by 运行 解释此查询的计划:
db.spinHistory.explain().aggregate([
{
"$match": {
"gameRef": "6047a10c58ed573e490b8f54"
}
},
{
"$project": {
"platformRef": 1,
"gameRef": 1,
"currency": 1,
"win": 1,
"bet": 1,
"bonusWin": "$data.bonusWin",
"_id": 0
}
},
{
"$group": {
"_id": {
"platformRef": "$platformRef",
"gameRef": "$gameRef",
"currency": "$currency"
},
"bet": {
"$sum": "$bet"
},
"win": {
"$sum": "$win"
},
"bonus": {
"$sum": "$data.bonusWin"
},
"count": {
"$sum": 1
}
}
},
{
"$project": {
"platformRef": "$_id.platformRef",
"gameRef": "$_id.gameRef",
"currency": "$_id.currency",
"bet": 1,
"win": 1,
"bonus": 1,
"count": 1
}
}
])
@Yahya 推荐,我可以看到这个:
{
"stages": [
{
"$cursor": {
"queryPlanner": {
"plannerVersion": 1,
"namespace": "oak9e_rgs_temp.spinHistory",
"indexFilterSet": false,
"parsedQuery": {
"gameRef": {
"$eq": "6047a10c58ed573e490b8f54"
}
},
"queryHash": "27C08187",
"planCacheKey": "E204EC8C",
"winningPlan": {
"stage": "PROJECTION_DEFAULT",
"transformBy": {
"bet": true,
"platformRef": true,
"win": true,
"currency": true,
"gameRef": true,
"bonusWin": "$data.bonusWin",
"_id": false
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"gameRef": 1,
"platformRef": 1,
"currency": 1,
"bet": 1,
"win": 1,
"data.bonusWin": 1
},
"indexName": "idx_spin_history_main_fields",
"isMultiKey": false,
"multiKeyPaths": {
"gameRef": [],
"platformRef": [],
"currency": [],
"bet": [],
"win": [],
"data.bonusWin": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"gameRef": [
"[\"6047a10c58ed573e490b8f54\", \"6047a10c58ed573e490b8f54\"]"
],
"platformRef": [
"[MinKey, MaxKey]"
],
"currency": [
"[MinKey, MaxKey]"
],
"bet": [
"[MinKey, MaxKey]"
],
"win": [
"[MinKey, MaxKey]"
],
"data.bonusWin": [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans": []
}
}
},
{
"$group": {
"_id": {
"platformRef": "$platformRef",
"gameRef": "$gameRef",
"currency": "$currency"
},
"bet": {
"$sum": "$bet"
},
"win": {
"$sum": "$win"
},
"bonus": {
"$sum": "$data.bonusWin"
},
"count": {
"$sum": {
"$const": 1
}
}
}
},
{
"$project": {
"_id": true,
"bet": true,
"bonus": true,
"count": true,
"win": true,
"platformRef": "$_id.platformRef",
"gameRef": "$_id.gameRef",
"currency": "$_id.currency"
}
}
],
"serverInfo": {
"host": "DESKTOP-V3NTFPM",
"port": 27017,
"version": "4.4.3",
"gitVersion": "913d6b62acfbb344dde1b116f4161360acd8fd13"
},
"ok": 1
}
这是我为此查询中使用的所有字段创建的索引:
{
"v": 2,
"key": {
"gameRef": 1,
"platformRef": 1,
"currency": 1,
"bet": 1,
"win": 1,
"data.bonusWin": 1
},
"name": "idx_spin_history_main_fields",
"background": false
}
执行统计数据:
{
"stages": [
{
"$cursor": {
"queryPlanner": {
"plannerVersion": 1,
"namespace": "oak9e_rgs_temp.spinHistory",
"indexFilterSet": false,
"parsedQuery": {
"gameRef": {
"$eq": "6047a10c58ed573e490b8f54"
}
},
"queryHash": "27C08187",
"planCacheKey": "E204EC8C",
"winningPlan": {
"stage": "PROJECTION_DEFAULT",
"transformBy": {
"gameRef": true,
"win": true,
"platformRef": true,
"bet": true,
"currency": true,
"bonusWin": "$data.bonusWin",
"_id": false
},
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"gameRef": 1,
"platformRef": 1,
"currency": 1,
"bet": 1,
"win": 1,
"data.bonusWin": 1
},
"indexName": "idx_spin_history_main_fields",
"isMultiKey": false,
"multiKeyPaths": {
"gameRef": [],
"platformRef": [],
"currency": [],
"bet": [],
"win": [],
"data.bonusWin": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"gameRef": [
"[\"6047a10c58ed573e490b8f54\", \"6047a10c58ed573e490b8f54\"]"
],
"platformRef": [
"[MinKey, MaxKey]"
],
"currency": [
"[MinKey, MaxKey]"
],
"bet": [
"[MinKey, MaxKey]"
],
"win": [
"[MinKey, MaxKey]"
],
"data.bonusWin": [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans": []
},
"executionStats": {
"executionSuccess": true,
"nReturned": 1145023,
"executionTimeMillis": 4473,
"totalKeysExamined": 1145023,
"totalDocsExamined": 0,
"executionStages": {
"stage": "PROJECTION_DEFAULT",
"nReturned": 1145023,
"executionTimeMillisEstimate": 623,
"works": 1145024,
"advanced": 1145023,
"needTime": 0,
"needYield": 0,
"saveState": 1295,
"restoreState": 1295,
"isEOF": 1,
"transformBy": {
"gameRef": true,
"win": true,
"platformRef": true,
"bet": true,
"currency": true,
"bonusWin": "$data.bonusWin",
"_id": false
},
"inputStage": {
"stage": "IXSCAN",
"nReturned": 1145023,
"executionTimeMillisEstimate": 161,
"works": 1145024,
"advanced": 1145023,
"needTime": 0,
"needYield": 0,
"saveState": 1295,
"restoreState": 1295,
"isEOF": 1,
"keyPattern": {
"gameRef": 1,
"platformRef": 1,
"currency": 1,
"bet": 1,
"win": 1,
"data.bonusWin": 1
},
"indexName": "idx_spin_history_main_fields",
"isMultiKey": false,
"multiKeyPaths": {
"gameRef": [],
"platformRef": [],
"currency": [],
"bet": [],
"win": [],
"data.bonusWin": []
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"gameRef": [
"[\"6047a10c58ed573e490b8f54\", \"6047a10c58ed573e490b8f54\"]"
],
"platformRef": [
"[MinKey, MaxKey]"
],
"currency": [
"[MinKey, MaxKey]"
],
"bet": [
"[MinKey, MaxKey]"
],
"win": [
"[MinKey, MaxKey]"
],
"data.bonusWin": [
"[MinKey, MaxKey]"
]
},
"keysExamined": 1145023,
"seeks": 1,
"dupsTested": 0,
"dupsDropped": 0
}
}
}
},
"nReturned": NumberLong(1145023),
"executionTimeMillisEstimate": NumberLong(4074)
},
{
"$group": {
"_id": {
"platformRef": "$platformRef",
"gameRef": "$gameRef",
"currency": "$currency"
},
"bet": {
"$sum": "$bet"
},
"win": {
"$sum": "$win"
},
"bonus": {
"$sum": "$data.bonusWin"
},
"count": {
"$sum": {
"$const": 1
}
}
},
"nReturned": NumberLong(3),
"executionTimeMillisEstimate": NumberLong(4467)
},
{
"$project": {
"_id": true,
"bonus": true,
"count": true,
"win": true,
"bet": true,
"platformRef": "$_id.platformRef",
"gameRef": "$_id.gameRef",
"currency": "$_id.currency"
},
"nReturned": NumberLong(3),
"executionTimeMillisEstimate": NumberLong(4467)
}
],
"serverInfo": {
"host": "DESKTOP-V3NTFPM",
"port": 27017,
"version": "4.4.3",
"gitVersion": "913d6b62acfbb344dde1b116f4161360acd8fd13"
},
"ok": 1
}
如果索引的大小不是问题(听起来好像不是)。然后删除您为聚合管道创建的所有这些新索引。创建以下索引:
{ gameRef: 1, platformRef: 1, currency: 1, bet: 1, win: 1, "data.bonusWin": 1 }
这实际上是在索引您需要的所有字段。
接下来你只需要投影。
所以,在match
和group
阶段之间。添加另一个投影阶段,例如:
{ _id: 0, gameRef: 1, platformRef: 1, currency: 1, bet: 1, win: 1, "data.bonusWin": 1 }
这里重要的一点是投射出来_id
对问题进行编辑后,很明显这是它能得到的最好结果。但是还是4秒长。
可能的解决方案:
- 不要每次都运行聚合管道。使用 $merge 阶段创建物化视图。并从那里查询。
- 创建摘要集合,这可以从应用程序代码本身完成,也可以使用 Change Streams 完成(它们再次在应用程序范围内工作)。