优化 mongoid 查找查询性能

optimize mongoid find query performance

我有一个 mongo 集合,其中存储了 user_id(外键)和 address_id(外键)。该馆藏目前拥有超过 500 万条记录。我有 3 个分片,集合被分片为

db.adminCommand({shardCollection: "my_db.user_addresses", key: { user_id: 1, address_id: 1}})

说明查询状态

pp UserAddress.where(id: '5ace54343b816c0cdf4b2aa9').explain
{"queryPlanner"=>
  {"mongosPlannerVersion"=>1,
   "winningPlan"=>
    {"stage"=>"SHARD_MERGE",
     "shards"=>
      [{"shardName"=>"ShardOne",
        "connectionString"=>
         "ShardOne/ip-xxx-xx-0-111:17018,ip-xxx-xx-9-99:17017",
        "serverInfo"=>
         {"host"=>"ip-xxx-xx-9-99",
          "port"=>17017,
          "version"=>"3.6.3",
          "gitVersion"=>"9586e557d54ef70f9ca4b43c26892cd55257e1a5"},
        "plannerVersion"=>1,
        "namespace"=>"my_db.user_addresses",
        "indexFilterSet"=>false,
        "parsedQuery"=>
         {"_id"=>{"$eq"=>BSON::ObjectId('5ace54343b816c0cdf4b2aa9')}},
        "winningPlan"=>
         {"stage"=>"SHARDING_FILTER", "inputStage"=>{"stage"=>"IDHACK"}},
        "rejectedPlans"=>[]},
       {"shardName"=>"ShardTwo",
        "connectionString"=>
         "ShardTwo/ip-xxx-xx-9-222:11018,ip-xxx-xx-9-66:11017",
        "serverInfo"=>
         {"host"=>"ip-xxx-xx-9-66",
          "port"=>11017,
          "version"=>"3.6.3",
          "gitVersion"=>"9586e557d54ef70f9ca4b43c26892cd55257e1a5"},
        "plannerVersion"=>1,
        "namespace"=>"my_db.user_addresses",
        "indexFilterSet"=>false,
        "parsedQuery"=>
         {"_id"=>{"$eq"=>BSON::ObjectId('5ace54343b816c0cdf4b2aa9')}},
        "winningPlan"=>
         {"stage"=>"SHARDING_FILTER", "inputStage"=>{"stage"=>"IDHACK"}},
        "rejectedPlans"=>[]},
       {"shardName"=>"ShardThree",
        "connectionString"=>
         "ShardThree/ip-xxx-xx-9-143:88888,ip-xxx-xx-0-87:88887",
        "serverInfo"=>
         {"host"=>"ip-xxx-xx-0-87",
          "port"=>88887,
          "version"=>"3.6.3",
          "gitVersion"=>"9586e557d54ef70f9ca4b43c26892cd55257e1a5"},
        "plannerVersion"=>1,
        "namespace"=>"my_db.user_addresses",
        "indexFilterSet"=>false,
        "parsedQuery"=>
         {"_id"=>{"$eq"=>BSON::ObjectId('5ace54343b816c0cdf4b2aa9')}},
        "winningPlan"=>
         {"stage"=>"SHARDING_FILTER", "inputStage"=>{"stage"=>"IDHACK"}},
        "rejectedPlans"=>[]}]}},
 "executionStats"=>
  {"nReturned"=>1,
   "executionTimeMillis"=>1,
   "totalKeysExamined"=>1,
   "totalDocsExamined"=>1,
   "executionStages"=>
    {"stage"=>"SHARD_MERGE",
     "nReturned"=>1,
     "executionTimeMillis"=>1,
     "totalKeysExamined"=>1,
     "totalDocsExamined"=>1,
     "totalChildMillis"=>0,
     "shards"=>
      [{"shardName"=>"ShardOne",
        "executionSuccess"=>true,
        "executionStages"=>
         {"stage"=>"SHARDING_FILTER",
          "nReturned"=>0,
          "executionTimeMillisEstimate"=>0,
          "works"=>1,
          "advanced"=>0,
          "needTime"=>0,
          "needYield"=>0,
          "saveState"=>0,
          "restoreState"=>0,
          "isEOF"=>1,
          "invalidates"=>0,
          "chunkSkips"=>0,
          "inputStage"=>
           {"stage"=>"IDHACK",
            "nReturned"=>0,
            "executionTimeMillisEstimate"=>0,
            "works"=>1,
            "advanced"=>0,
            "needTime"=>0,
            "needYield"=>0,
            "saveState"=>0,
            "restoreState"=>0,
            "isEOF"=>1,
            "invalidates"=>0,
            "keysExamined"=>0,
            "docsExamined"=>0}}},
       {"shardName"=>"ShardTwo",
        "executionSuccess"=>true,
        "executionStages"=>
         {"stage"=>"SHARDING_FILTER",
          "nReturned"=>0,
          "executionTimeMillisEstimate"=>0,
          "works"=>1,
          "advanced"=>0,
          "needTime"=>0,
          "needYield"=>0,
          "saveState"=>0,
          "restoreState"=>0,
          "isEOF"=>1,
          "invalidates"=>0,
          "chunkSkips"=>0,
          "inputStage"=>
           {"stage"=>"IDHACK",
            "nReturned"=>0,
            "executionTimeMillisEstimate"=>0,
            "works"=>1,
            "advanced"=>0,
            "needTime"=>0,
            "needYield"=>0,
            "saveState"=>0,
            "restoreState"=>0,
            "isEOF"=>1,
            "invalidates"=>0,
            "keysExamined"=>0,
            "docsExamined"=>0}}},
       {"shardName"=>"ShardThree",
        "executionSuccess"=>true,
        "executionStages"=>
         {"stage"=>"SHARDING_FILTER",
          "nReturned"=>1,
          "executionTimeMillisEstimate"=>0,
          "works"=>2,
          "advanced"=>1,
          "needTime"=>0,
          "needYield"=>0,
          "saveState"=>0,
          "restoreState"=>0,
          "isEOF"=>1,
          "invalidates"=>0,
          "chunkSkips"=>0,
          "inputStage"=>
           {"stage"=>"IDHACK",
            "nReturned"=>1,
            "executionTimeMillisEstimate"=>0,
            "works"=>1,
            "advanced"=>1,
            "needTime"=>0,
            "needYield"=>0,
            "saveState"=>0,
            "restoreState"=>0,
            "isEOF"=>1,
            "invalidates"=>0,
            "keysExamined"=>1,
            "docsExamined"=>1}}}]},
   "allPlansExecution"=>
    [{"shardName"=>"ShardOne", "allPlans"=>[]},
     {"shardName"=>"ShardTwo", "allPlans"=>[]},
     {"shardName"=>"ShardThree", "allPlans"=>[]}]},
 "ok"=>1.0,
 "$clusterTime"=>
  {"clusterTime"=>
    #<BSON::Timestamp:0x31dbca5d @increment=475, @seconds=1523618199>,
   "signature"=>
    {"hash"=><BSON::Binary:0x2786 type=generic data=0x57ecb8e45eee5178...>,
     "keyId"=>6537488309583609875}},
 "operationTime"=>
  #<BSON::Timestamp:0x21ebf9be @increment=474, @seconds=1523618199>}

问题是 new relic 指出这是所有数据库查询中最耗时的查询。

Avg response time: 50,100 ms
Min: 1.37ms
Max: 62400 ms
Throughput: 104 cpm

我们如何优化查找查询?

Min: 1.37ms, Max: 62400 ms 表明问题不在于查询本身,而在于集群,例如当其中一个碎片挂起一分钟时。 "stage"=>"IDHACK" 基本上说再优化它会很棘手。

我不明白你为什么一开始就把它分片。 500 万个文档 x 50 字节,每个文档占用不到半 GB。您可以轻松地适应单个分片的内存,并使用覆盖索引使查询速度非常快。