使用索引在 n1ql 和 couchbase 中提高性能
performance improve in n1ql,couchbase using index
我有以下查询
explain SELECT * FROM (select ROUND(sum(ARRAY_SUM(DailyCampaignUsage.`statistics`[*].clicksCost)),2) total_revenue,
ROUND(sum(CASE WHEN DailyCampaignUsage.day between '2016-05-01' and '2016-05-23' THEN ARRAY_SUM(DailyCampaignUsage.`statistics`[*].clicksCost) ELSE 0 END),2) period_revenue,
ROUND(sum(CASe WHEN DailyCampaignUsage.day between '2016-04-01' and '2016-04-23' THEN ARRAY_SUM(DailyCampaignUsage.`statistics`[*].clicksCost) ELSE 0 END),2) period_prev_revenue
from Inheritx DailyCampaignUsage use index(dailyCampaignUsage_type_day_clicksCost)
JOIN Inheritx Campaign ON KEYS ('Campaign|'||TOSTRING(DailyCampaignUsage.campaignId))
JOIN Inheritx Users on keys('User|'|| TOSTRING(Campaign.`user`))
WHERE DailyCampaignUsage._type='DailyCampaignUsage' and CASE WHEN FALSE THEN Users.`user` in FALSE ELSE TRUE END ) AS __viewdef__ ORDER BY `created` DESC
我的索引低于
CREATE INDEX dailyCampaignUsage_type_day_clicksCost ON Inheritx
(_type,day,`statistics`[*].clicksCost) WHERE _type='DailyCampaignUsage'
我在查询中使用了它。
我的解释计划失败了。
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Sequence",
"~children": [
{
"#operator": "Sequence",
"~children": [
{
"#operator": "IndexScan",
"index": "dailyCampaignUsage_type_day_clicksCost",
"index_id": "37387d27d560354b",
"keyspace": "Inheritx",
"namespace": "default",
"spans": [
{
"Range": {
"High": [
"successor(\"DailyCampaignUsage\")"
],
"Inclusion": 1,
"Low": [
"\"DailyCampaignUsage\""
]
}
}
],
"using": "gsi"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Fetch",
"as": "DailyCampaignUsage",
"keyspace": "Inheritx",
"namespace": "default"
},
{
"#operator": "Join",
"as": "Campaign",
"keyspace": "Inheritx",
"namespace": "default",
"on_keys": "(\"Campaign|\" || to_string((`DailyCampaignUsage`.`campaignId`)))"
},
{
"#operator": "Join",
"as": "Users",
"keyspace": "Inheritx",
"namespace": "default",
"on_keys": "(\"User|\" || to_string((`Campaign`.`user`)))"
},
{
"#operator": "Filter",
"condition": "(((`DailyCampaignUsage`.`_type`) = \"DailyCampaignUsage\") and case when false then ((`Users`.`user`) in false) else true end)"
},
{
"#operator": "InitialGroup",
"aggregates": [
"sum(array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)))",
"sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-04-01\" and \"2016-04-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end)",
"sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-05-01\" and \"2016-05-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end)"
],
"group_keys": []
}
]
}
},
{
"#operator": "IntermediateGroup",
"aggregates": [
"sum(array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)))",
"sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-04-01\" and \"2016-04-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end)",
"sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-05-01\" and \"2016-05-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end)"
],
"group_keys": []
},
{
"#operator": "FinalGroup",
"aggregates": [
"sum(array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)))",
"sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-04-01\" and \"2016-04-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end)",
"sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-05-01\" and \"2016-05-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end)"
],
"group_keys": []
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "InitialProject",
"result_terms": [
{
"as": "total_revenue",
"expr": "round(sum(array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`))), 2)"
},
{
"as": "period_revenue",
"expr": "round(sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-05-01\" and \"2016-05-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end), 2)"
},
{
"as": "period_prev_revenue",
"expr": "round(sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-04-01\" and \"2016-04-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end), 2)"
}
]
},
{
"#operator": "FinalProject"
}
]
}
}
]
},
{
"#operator": "Alias",
"as": "__viewdef__"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "InitialProject",
"result_terms": [
{
"expr": "self",
"star": true
}
]
}
]
}
}
]
},
{
"#operator": "Order",
"sort_terms": [
{
"desc": true,
"expr": "(`__viewdef__`.`created`)"
}
]
},
{
"#operator": "FinalProject"
}
]
},
"text": "SELECT * FROM (select ROUND(sum(ARRAY_SUM(DailyCampaignUsage.`statistics`[*].clicksCost)),2) total_revenue,\nROUND(sum(CASE WHEN DailyCampaignUsage.day between '2016-05-01' and '2016-05-23' THEN ARRAY_SUM(DailyCampaignUsage.`statistics`[*].clicksCost) ELSE 0 END),2) period_revenue,\nROUND(sum(CASe WHEN DailyCampaignUsage.day between '2016-04-01' and '2016-04-23' THEN ARRAY_SUM(DailyCampaignUsage.`statistics`[*].clicksCost) ELSE 0 END),2) period_prev_revenue \nfrom Inheritx DailyCampaignUsage use index(dailyCampaignUsage_type_day_clicksCost)\nJOIN Inheritx Campaign ON KEYS ('Campaign|'||TOSTRING(DailyCampaignUsage.campaignId)) \nJOIN Inheritx Users on keys('User|'|| TOSTRING(Campaign.`user`)) \nWHERE DailyCampaignUsage._type='DailyCampaignUsage' and CASE WHEN FALSE THEN Users.`user` in FALSE ELSE TRUE END ) AS __viewdef__ ORDER BY `created` DESC"
}
即使使用索引我也无法减少它的执行。这是 13s
我怎样才能做到 300 到 500 毫秒 ?
我的 json 如下所示 我有 50k+ json
DailyCampaignUsage|006657c0-c696-11e6-b6f2-7f0166ec7527{
"_id": "006657c0-c696-11e6-b6f2-7f0166ec7527",
"_type": "DailyCampaignUsage",
"campaignId": 249,
"day": "2015-11-19T00:00:00Z",
"statistics": [
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{
"clicks": 1741,
"clicksCost": 48.748
}
]
}
1) 你能不能先尝试优化内部查询。通过在索引定义中包含 where/projections/join-on-keys 中使用的第一个键空间的所有字段,使其使用覆盖索引。参见 https://developer.couchbase.com/documentation/server/4.5/indexes/covering-indexes.html。文档中的最后一个示例适用于您。类似于:
CREATE INDEX dailyCampaignUsage_type_day_clicksCost ON Inheritx
(_type,day, campaignId, `statistics`[*].clicksCost) WHERE _type='DailyCampaignUsage'
2) 您可以尝试内存优化索引 (MOI) 来极大地提高性能。这需要企业版。参见 https://developer.couchbase.com/documentation/server/4.5/architecture/global-secondary-indexes.html#story-h2-2
3) 外部查询仅在 'created' 上进行排序,而内部查询并未预测。如果它在第一个键空间中,则将其包含在索引中。
4) 同时在 Users.user 上使用 CASE 检查 WHERE 条件。它总是评估为真。不确定您是否需要第三次加入。
嗯,
-普拉萨德
我有以下查询
explain SELECT * FROM (select ROUND(sum(ARRAY_SUM(DailyCampaignUsage.`statistics`[*].clicksCost)),2) total_revenue,
ROUND(sum(CASE WHEN DailyCampaignUsage.day between '2016-05-01' and '2016-05-23' THEN ARRAY_SUM(DailyCampaignUsage.`statistics`[*].clicksCost) ELSE 0 END),2) period_revenue,
ROUND(sum(CASe WHEN DailyCampaignUsage.day between '2016-04-01' and '2016-04-23' THEN ARRAY_SUM(DailyCampaignUsage.`statistics`[*].clicksCost) ELSE 0 END),2) period_prev_revenue
from Inheritx DailyCampaignUsage use index(dailyCampaignUsage_type_day_clicksCost)
JOIN Inheritx Campaign ON KEYS ('Campaign|'||TOSTRING(DailyCampaignUsage.campaignId))
JOIN Inheritx Users on keys('User|'|| TOSTRING(Campaign.`user`))
WHERE DailyCampaignUsage._type='DailyCampaignUsage' and CASE WHEN FALSE THEN Users.`user` in FALSE ELSE TRUE END ) AS __viewdef__ ORDER BY `created` DESC
我的索引低于
CREATE INDEX dailyCampaignUsage_type_day_clicksCost ON Inheritx
(_type,day,`statistics`[*].clicksCost) WHERE _type='DailyCampaignUsage'
我在查询中使用了它。
我的解释计划失败了。
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Sequence",
"~children": [
{
"#operator": "Sequence",
"~children": [
{
"#operator": "IndexScan",
"index": "dailyCampaignUsage_type_day_clicksCost",
"index_id": "37387d27d560354b",
"keyspace": "Inheritx",
"namespace": "default",
"spans": [
{
"Range": {
"High": [
"successor(\"DailyCampaignUsage\")"
],
"Inclusion": 1,
"Low": [
"\"DailyCampaignUsage\""
]
}
}
],
"using": "gsi"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Fetch",
"as": "DailyCampaignUsage",
"keyspace": "Inheritx",
"namespace": "default"
},
{
"#operator": "Join",
"as": "Campaign",
"keyspace": "Inheritx",
"namespace": "default",
"on_keys": "(\"Campaign|\" || to_string((`DailyCampaignUsage`.`campaignId`)))"
},
{
"#operator": "Join",
"as": "Users",
"keyspace": "Inheritx",
"namespace": "default",
"on_keys": "(\"User|\" || to_string((`Campaign`.`user`)))"
},
{
"#operator": "Filter",
"condition": "(((`DailyCampaignUsage`.`_type`) = \"DailyCampaignUsage\") and case when false then ((`Users`.`user`) in false) else true end)"
},
{
"#operator": "InitialGroup",
"aggregates": [
"sum(array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)))",
"sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-04-01\" and \"2016-04-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end)",
"sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-05-01\" and \"2016-05-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end)"
],
"group_keys": []
}
]
}
},
{
"#operator": "IntermediateGroup",
"aggregates": [
"sum(array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)))",
"sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-04-01\" and \"2016-04-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end)",
"sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-05-01\" and \"2016-05-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end)"
],
"group_keys": []
},
{
"#operator": "FinalGroup",
"aggregates": [
"sum(array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)))",
"sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-04-01\" and \"2016-04-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end)",
"sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-05-01\" and \"2016-05-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end)"
],
"group_keys": []
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "InitialProject",
"result_terms": [
{
"as": "total_revenue",
"expr": "round(sum(array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`))), 2)"
},
{
"as": "period_revenue",
"expr": "round(sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-05-01\" and \"2016-05-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end), 2)"
},
{
"as": "period_prev_revenue",
"expr": "round(sum(case when ((`DailyCampaignUsage`.`day`) between \"2016-04-01\" and \"2016-04-23\") then array_sum((array_star((`DailyCampaignUsage`.`statistics`)).`clicksCost`)) else 0 end), 2)"
}
]
},
{
"#operator": "FinalProject"
}
]
}
}
]
},
{
"#operator": "Alias",
"as": "__viewdef__"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "InitialProject",
"result_terms": [
{
"expr": "self",
"star": true
}
]
}
]
}
}
]
},
{
"#operator": "Order",
"sort_terms": [
{
"desc": true,
"expr": "(`__viewdef__`.`created`)"
}
]
},
{
"#operator": "FinalProject"
}
]
},
"text": "SELECT * FROM (select ROUND(sum(ARRAY_SUM(DailyCampaignUsage.`statistics`[*].clicksCost)),2) total_revenue,\nROUND(sum(CASE WHEN DailyCampaignUsage.day between '2016-05-01' and '2016-05-23' THEN ARRAY_SUM(DailyCampaignUsage.`statistics`[*].clicksCost) ELSE 0 END),2) period_revenue,\nROUND(sum(CASe WHEN DailyCampaignUsage.day between '2016-04-01' and '2016-04-23' THEN ARRAY_SUM(DailyCampaignUsage.`statistics`[*].clicksCost) ELSE 0 END),2) period_prev_revenue \nfrom Inheritx DailyCampaignUsage use index(dailyCampaignUsage_type_day_clicksCost)\nJOIN Inheritx Campaign ON KEYS ('Campaign|'||TOSTRING(DailyCampaignUsage.campaignId)) \nJOIN Inheritx Users on keys('User|'|| TOSTRING(Campaign.`user`)) \nWHERE DailyCampaignUsage._type='DailyCampaignUsage' and CASE WHEN FALSE THEN Users.`user` in FALSE ELSE TRUE END ) AS __viewdef__ ORDER BY `created` DESC"
}
即使使用索引我也无法减少它的执行。这是 13s 我怎样才能做到 300 到 500 毫秒 ? 我的 json 如下所示 我有 50k+ json
DailyCampaignUsage|006657c0-c696-11e6-b6f2-7f0166ec7527{
"_id": "006657c0-c696-11e6-b6f2-7f0166ec7527",
"_type": "DailyCampaignUsage",
"campaignId": 249,
"day": "2015-11-19T00:00:00Z",
"statistics": [
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{},
{
"clicks": 1741,
"clicksCost": 48.748
}
]
}
1) 你能不能先尝试优化内部查询。通过在索引定义中包含 where/projections/join-on-keys 中使用的第一个键空间的所有字段,使其使用覆盖索引。参见 https://developer.couchbase.com/documentation/server/4.5/indexes/covering-indexes.html。文档中的最后一个示例适用于您。类似于:
CREATE INDEX dailyCampaignUsage_type_day_clicksCost ON Inheritx
(_type,day, campaignId, `statistics`[*].clicksCost) WHERE _type='DailyCampaignUsage'
2) 您可以尝试内存优化索引 (MOI) 来极大地提高性能。这需要企业版。参见 https://developer.couchbase.com/documentation/server/4.5/architecture/global-secondary-indexes.html#story-h2-2
3) 外部查询仅在 'created' 上进行排序,而内部查询并未预测。如果它在第一个键空间中,则将其包含在索引中。
4) 同时在 Users.user 上使用 CASE 检查 WHERE 条件。它总是评估为真。不确定您是否需要第三次加入。
嗯, -普拉萨德