MongoDB 数组的性能问题
MongoDB Performance Issue with arrays
我正在寻找有关如何提高查询性能的建议。
我在 mongoose 中有这个用户模型,我正在索引 interested_cities.
const firebaseToken = new Schema({
type: {
type: String,
default: "android",
required: true,
trim: true,
},
device_id: {
type: String,
required: true,
trim: true,
},
fcm_token: {
type: String,
required: true,
trim: true,
},
});
const userSchema = new Schema({
name: {
type: String,
required: true,
trim: true,
},
interested_cities: {
type: [{
type: String,
trim: true,
lowercase: true,
unique: true
}],
required: false,
default: [],
},
push_notification_tokens_firebase: {
type: [firebaseToken],
required: false,
default: [],
},
});
userSchema.index({
interested_cities: 1
});
我正在寻找的是查询在 interested_cities 数组中具有 'A' 或 'B' 的用户。
我正在查询类似的内容。我只需要查询中的 firebase fcm_token。
const involvedUsers = await User.find(
{
$or: [
{ interested_cities: { $in: ['A', 'B'] } },
{ phone_number: { $in: adminPhoneNumbersList } },
],
},
{
_id: 1,
"push_notification_tokens_firebase.fcm_token": 1,
}
);
目前,查询 14k 文档需要 20 秒,这需要改进。
任何指针将不胜感激。
解释:
{
"explainVersion": "1",
"queryPlanner": {
"namespace": "production.users",
"indexFilterSet": false,
"parsedQuery": {
"interested_cities": {
"$in": [
"A",
"B"
]
}
},
"maxIndexedOrSolutionsReached": false,
"maxIndexedAndSolutionsReached": false,
"maxScansToExplodeReached": false,
"winningPlan": {
"stage": "PROJECTION_DEFAULT",
"transformBy": {
"_id": 1,
"push_notification_tokens_firebase.fcm_token": 1
},
"inputStage": {
"stage": "FETCH",
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"interested_cities": 1
},
"indexName": "interested_cities_1",
"isMultiKey": true,
"multiKeyPaths": {
"interested_cities": [
"interested_cities"
]
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"interested_cities": [
"[\"A\", \"A\"]",
"[\"B\", \"B\"]"
]
}
}
}
},
"rejectedPlans": []
}
"executionStats": {
"executionSuccess": true,
"nReturned": 6497,
"executionTimeMillis": 48,
"totalKeysExamined": 0,
"totalDocsExamined": 14827,
"executionStages": {
"stage": "SUBPLAN",
"nReturned": 6497,
"executionTimeMillisEstimate": 46,
"works": 14829,
"advanced": 6497,
"needTime": 8331,
"needYield": 0,
"saveState": 14,
"restoreState": 14,
"isEOF": 1,
"inputStage": {
"stage": "PROJECTION_DEFAULT",
"nReturned": 6497,
"executionTimeMillisEstimate": 46,
"works": 14829,
"advanced": 6497,
"needTime": 8331,
"needYield": 0,
"saveState": 14,
"restoreState": 14,
"isEOF": 1,
"transformBy": {
"_id": 1,
"push_notification_tokens_firebase.fcm_token": 1
},
"inputStage": {
"stage": "COLLSCAN",
"filter": {
"$or": [
{
"interested_cities": {
"$in": [
"A",
"B"
]
}
},
{
"phone_number": {
"$in": [
"phone numbers",
"phone number"
]
}
}
]
},
"nReturned": 6497,
"executionTimeMillisEstimate": 41,
"works": 14829,
"advanced": 6497,
"needTime": 8331,
"needYield": 0,
"saveState": 14,
"restoreState": 14,
"isEOF": 1,
"direction": "forward",
"docsExamined": 14827
}
}
},
"allPlansExecution": []
}
猫鼬优化:
By default, Mongoose queries return an instance of the Mongoose Document class. Documents are much heavier than vanilla JavaScript objects, because they have a lot of internal state for change tracking. Enabling the lean option tells Mongoose to skip instantiating a full Mongoose document and just give you the POJO.
https://mongoosejs.com/docs/tutorials/lean.html#using-lean
您可以在 per-query 的基础上通过在末尾附加 .lean()
来禁用此行为。如果您的查询是 returning “大量”文档,这确实可以提高您的速度。您应该从上面的 link 阅读更多关于 lean() 的信息。
查询优化:
When evaluating the clauses in the $or expression, MongoDB either performs a collection scan or, if all the clauses are supported by indexes, MongoDB performs index scans. That is, for MongoDB to use indexes to evaluate an $or expression, all the clauses in the $or expression must be supported by indexes. Otherwise, MongoDB will perform a collection scan.
https://www.mongodb.com/docs/manual/reference/operator/query/or/#-or-clauses-and-indexes
您共享的查询如下所示:
const involvedUsers = await User.find({
$or: [
{ interested_cities: { $in: citiesArr } },
{ phone_number: { $in: phonesArr } },
],
}, { _id: 1, "push_notification_tokens_firebase.fcm_token": 1 });
根据以上信息,您需要创建以下两个索引:
userSchema.index({ interested_cities: 1 });
userSchema.index({ phone_number: 1 });
这样,mongo 将能够“知道”哪些文档是相关的,在磁盘上找到它们,提取您的投影(“_id”和“push_notification_tokens_firebase.fcm_token”)和return它。
优化的更进一步是创建以下索引而不是上面的索引:
userSchema.index({ interested_cities: 1, _id: 1, "push_notification_tokens_firebase.fcm_token": 1 });
userSchema.index({ phone_number: 1, _id: 1, "push_notification_tokens_firebase.fcm_token": 1 });
这样,mongo 将从索引中获得完成查询所需的所有信息,这意味着它永远不会访问磁盘来获取文档。
您可以通过 运行 <your-query>.explain('executionStats')
确认 totalDocsExamined
是 0
。
在此处阅读有关 executionStats 的更多信息:
https://www.mongodb.com/docs/manual/reference/explain-results/#mongodb-data-explain.executionStats
希望对您有所帮助!
我正在寻找有关如何提高查询性能的建议。
我在 mongoose 中有这个用户模型,我正在索引 interested_cities.
const firebaseToken = new Schema({
type: {
type: String,
default: "android",
required: true,
trim: true,
},
device_id: {
type: String,
required: true,
trim: true,
},
fcm_token: {
type: String,
required: true,
trim: true,
},
});
const userSchema = new Schema({
name: {
type: String,
required: true,
trim: true,
},
interested_cities: {
type: [{
type: String,
trim: true,
lowercase: true,
unique: true
}],
required: false,
default: [],
},
push_notification_tokens_firebase: {
type: [firebaseToken],
required: false,
default: [],
},
});
userSchema.index({
interested_cities: 1
});
我正在寻找的是查询在 interested_cities 数组中具有 'A' 或 'B' 的用户。
我正在查询类似的内容。我只需要查询中的 firebase fcm_token。
const involvedUsers = await User.find(
{
$or: [
{ interested_cities: { $in: ['A', 'B'] } },
{ phone_number: { $in: adminPhoneNumbersList } },
],
},
{
_id: 1,
"push_notification_tokens_firebase.fcm_token": 1,
}
);
目前,查询 14k 文档需要 20 秒,这需要改进。 任何指针将不胜感激。
解释:
{
"explainVersion": "1",
"queryPlanner": {
"namespace": "production.users",
"indexFilterSet": false,
"parsedQuery": {
"interested_cities": {
"$in": [
"A",
"B"
]
}
},
"maxIndexedOrSolutionsReached": false,
"maxIndexedAndSolutionsReached": false,
"maxScansToExplodeReached": false,
"winningPlan": {
"stage": "PROJECTION_DEFAULT",
"transformBy": {
"_id": 1,
"push_notification_tokens_firebase.fcm_token": 1
},
"inputStage": {
"stage": "FETCH",
"inputStage": {
"stage": "IXSCAN",
"keyPattern": {
"interested_cities": 1
},
"indexName": "interested_cities_1",
"isMultiKey": true,
"multiKeyPaths": {
"interested_cities": [
"interested_cities"
]
},
"isUnique": false,
"isSparse": false,
"isPartial": false,
"indexVersion": 2,
"direction": "forward",
"indexBounds": {
"interested_cities": [
"[\"A\", \"A\"]",
"[\"B\", \"B\"]"
]
}
}
}
},
"rejectedPlans": []
}
"executionStats": {
"executionSuccess": true,
"nReturned": 6497,
"executionTimeMillis": 48,
"totalKeysExamined": 0,
"totalDocsExamined": 14827,
"executionStages": {
"stage": "SUBPLAN",
"nReturned": 6497,
"executionTimeMillisEstimate": 46,
"works": 14829,
"advanced": 6497,
"needTime": 8331,
"needYield": 0,
"saveState": 14,
"restoreState": 14,
"isEOF": 1,
"inputStage": {
"stage": "PROJECTION_DEFAULT",
"nReturned": 6497,
"executionTimeMillisEstimate": 46,
"works": 14829,
"advanced": 6497,
"needTime": 8331,
"needYield": 0,
"saveState": 14,
"restoreState": 14,
"isEOF": 1,
"transformBy": {
"_id": 1,
"push_notification_tokens_firebase.fcm_token": 1
},
"inputStage": {
"stage": "COLLSCAN",
"filter": {
"$or": [
{
"interested_cities": {
"$in": [
"A",
"B"
]
}
},
{
"phone_number": {
"$in": [
"phone numbers",
"phone number"
]
}
}
]
},
"nReturned": 6497,
"executionTimeMillisEstimate": 41,
"works": 14829,
"advanced": 6497,
"needTime": 8331,
"needYield": 0,
"saveState": 14,
"restoreState": 14,
"isEOF": 1,
"direction": "forward",
"docsExamined": 14827
}
}
},
"allPlansExecution": []
}
猫鼬优化:
By default, Mongoose queries return an instance of the Mongoose Document class. Documents are much heavier than vanilla JavaScript objects, because they have a lot of internal state for change tracking. Enabling the lean option tells Mongoose to skip instantiating a full Mongoose document and just give you the POJO.
https://mongoosejs.com/docs/tutorials/lean.html#using-lean
您可以在 per-query 的基础上通过在末尾附加 .lean()
来禁用此行为。如果您的查询是 returning “大量”文档,这确实可以提高您的速度。您应该从上面的 link 阅读更多关于 lean() 的信息。
查询优化:
When evaluating the clauses in the $or expression, MongoDB either performs a collection scan or, if all the clauses are supported by indexes, MongoDB performs index scans. That is, for MongoDB to use indexes to evaluate an $or expression, all the clauses in the $or expression must be supported by indexes. Otherwise, MongoDB will perform a collection scan.
https://www.mongodb.com/docs/manual/reference/operator/query/or/#-or-clauses-and-indexes
您共享的查询如下所示:
const involvedUsers = await User.find({
$or: [
{ interested_cities: { $in: citiesArr } },
{ phone_number: { $in: phonesArr } },
],
}, { _id: 1, "push_notification_tokens_firebase.fcm_token": 1 });
根据以上信息,您需要创建以下两个索引:
userSchema.index({ interested_cities: 1 });
userSchema.index({ phone_number: 1 });
这样,mongo 将能够“知道”哪些文档是相关的,在磁盘上找到它们,提取您的投影(“_id”和“push_notification_tokens_firebase.fcm_token”)和return它。
优化的更进一步是创建以下索引而不是上面的索引:
userSchema.index({ interested_cities: 1, _id: 1, "push_notification_tokens_firebase.fcm_token": 1 });
userSchema.index({ phone_number: 1, _id: 1, "push_notification_tokens_firebase.fcm_token": 1 });
这样,mongo 将从索引中获得完成查询所需的所有信息,这意味着它永远不会访问磁盘来获取文档。
您可以通过 运行 <your-query>.explain('executionStats')
确认 totalDocsExamined
是 0
。
在此处阅读有关 executionStats 的更多信息: https://www.mongodb.com/docs/manual/reference/explain-results/#mongodb-data-explain.executionStats
希望对您有所帮助!