如何改进包含存储过程使用的多个自连接的视图
How to improve a view containing multiple self joins used by a stored procedure
我有一个非常慢的存储过程(需要 5-6 分钟才能得到结果)包含一些 tables 和一个视图。我相信是视图中的多个自连接部分使存储过程变慢。这里 table A 是 700,000 行 table 和 B 是 20 行 table.
Table一个
id | status_key | status_date | seq
10035 2 2020-10-01 1
10035 3 2020-10-03 2
10049 2 2020-06-10 1
10049 3 2020-06-13 2
10049 4 2020-06-17 3
10049 5 2020-07-03 4
Table B
status_key | status_name
2 | accepted
3 | conditionally accepted
4 | decided
5 | declined
景色
SELECT a1.status_key as current_status_key,
b1.status_name as current_status_name,
a1.status_date as current_status_date,
a2.status_key as previous_status_key,
b2.status_name as precious_status_name,
a2.status_date as previous_status_date,
a3.status_key as next_status_key,
b3.status_name as next_status_name,
a3.status_date as next_status_date,
a4.status_key as next_2_status_key,
b4.status_name as next_2_status_name,
a4.status_date as next_2_status_date,
FROM A a1
INNER JOIN B b1 ON a1.status_key = b1.status_key
LEFT JOIN A a2 ON a1.id = a2.id AND a1.seq = a2.seq + 1
LEFT JOIN B b2 ON a2.status_key = b2.status_key
LEFT JOIN A a3 ON a1.id = a3.id AND a1.seq = a3.seq - 1
LEFT JOIN B b3 ON a3.status_key = b3.status_key
LEFT JOIN A a4 ON a1.id = a4.id AND a1.seq = a4.seq - 2
LEFT JOIN B b4 ON a4.status_key = b4.status_key
想要的结果
id | current_status_key | current_status_name | current_status_date | previous_status_key | previous_status_name | previous_status_date | next_status_key | next_status_name | next_status_date | next_2_status_key | next_2_status_name | next_2_status_date
10035 | 2 | accepted | 2020-10-01 | NULL | NULL | NULL | 3 | conditionally accepted | 2020-10-03 | NULL | NULL | NULL
10035 | 3 | conditionally accepted | 2020-10-03 | 2 | accepted | 2020-10-01 | | NULL | NULL | NULL | NULL | NULL | NULL
如何通过重写这部分来提高我的观点?我正在考虑使用 CTE 来分离上面的部分。有什么想法吗?
您可以使用 LAG 和 LEAD 来计算相关的状态键,而不是 A 上的 self-joins。这意味着它只需要从 A 读取行一次 - 但需要根据情况进行测试在你的 database/etc.
下面是上述内容的 SQL 示例。注意 - SQL 已在提供数据的问题更新后更新。
WITH a1 AS
(SELECT A.ID,
A.status_key AS a1_status_key,
A.status_date AS a1_status_date,
LAG(A.status_key, 1) OVER (PARTITION BY A.id ORDER BY A.seq) AS a2_status_key,
LAG(A.status_date, 1) OVER (PARTITION BY A.id ORDER BY A.seq) AS a2_status_date,
LEAD(A.status_key, 1) OVER (PARTITION BY A.id ORDER BY A.seq) AS a3_status_key,
LEAD(A.status_date, 1) OVER (PARTITION BY A.id ORDER BY A.seq) AS a3_status_date,
LEAD(A.status_key, 2) OVER (PARTITION BY A.id ORDER BY A.seq) AS a4_status_key,
LEAD(A.status_date, 2) OVER (PARTITION BY A.id ORDER BY A.seq) AS a4_status_date
FROM A
)
SELECT a1.id,
a1.a1_status_key as current_status_key,
b1.status_name as current_status_name,
a1.a1_status_date as current_status_date,
a1.a2_status_key as previous_status_key,
b2.status_name as previous_status_name,
a1.a2_status_date as previous_status_date,
a1.a3_status_key as next_status_key,
b3.status_name as next_status_name,
a1.a3_status_date as next_status_date,
a1.a4_status_key as next_2_status_key,
b4.status_name as next_2_status_name,
a1.a4_status_date as next_2_status_date
FROM a1
LEFT JOIN B b1 ON a1.a1_status_key = b1.status_key
LEFT JOIN B b2 ON a1.a2_status_key = b2.status_key
LEFT JOIN B b3 ON a1.a3_status_key = b3.status_key
LEFT JOIN B b4 ON a1.a4_status_key = b4.status_key;
这里是 db<>fiddle 使用温度 tables。
如果聚集索引在 id, seq
上,我认为它也会有很大帮助。如果 table A 实际上更大并且有其他值,那么在这两列上使用 non-clustered 索引,然后包括其他相关列可能会更好,例如 id, seq, status_date, status_key
.
旧版本
WITH a1 AS
(SELECT ...,
LEAD(A.status_key, 1) OVER (PARTITION BY A.id ORDER BY A.seq) AS a2_status_key,
LAG(A.status_key, 1) OVER (PARTITION BY A.id ORDER BY A.seq) AS a3_status_key,
LAG(A.status_key, 2) OVER (PARTITION BY A.id ORDER BY A.seq) AS a4_status_key
FROM A
)
SELECT a1.*, ...
FROM a1
LEFT JOIN B b1 ON a1.status_key = b1.status_key
LEFT JOIN B b2 ON a1.a2_status_key = b2.status_key
LEFT JOIN B b3 ON a1.a3_status_key = b3.status_key
LEFT JOIN B b4 ON a1.a4_status_key = b4.status_key;
我有一个非常慢的存储过程(需要 5-6 分钟才能得到结果)包含一些 tables 和一个视图。我相信是视图中的多个自连接部分使存储过程变慢。这里 table A 是 700,000 行 table 和 B 是 20 行 table.
Table一个
id | status_key | status_date | seq
10035 2 2020-10-01 1
10035 3 2020-10-03 2
10049 2 2020-06-10 1
10049 3 2020-06-13 2
10049 4 2020-06-17 3
10049 5 2020-07-03 4
Table B
status_key | status_name
2 | accepted
3 | conditionally accepted
4 | decided
5 | declined
景色
SELECT a1.status_key as current_status_key,
b1.status_name as current_status_name,
a1.status_date as current_status_date,
a2.status_key as previous_status_key,
b2.status_name as precious_status_name,
a2.status_date as previous_status_date,
a3.status_key as next_status_key,
b3.status_name as next_status_name,
a3.status_date as next_status_date,
a4.status_key as next_2_status_key,
b4.status_name as next_2_status_name,
a4.status_date as next_2_status_date,
FROM A a1
INNER JOIN B b1 ON a1.status_key = b1.status_key
LEFT JOIN A a2 ON a1.id = a2.id AND a1.seq = a2.seq + 1
LEFT JOIN B b2 ON a2.status_key = b2.status_key
LEFT JOIN A a3 ON a1.id = a3.id AND a1.seq = a3.seq - 1
LEFT JOIN B b3 ON a3.status_key = b3.status_key
LEFT JOIN A a4 ON a1.id = a4.id AND a1.seq = a4.seq - 2
LEFT JOIN B b4 ON a4.status_key = b4.status_key
想要的结果
id | current_status_key | current_status_name | current_status_date | previous_status_key | previous_status_name | previous_status_date | next_status_key | next_status_name | next_status_date | next_2_status_key | next_2_status_name | next_2_status_date
10035 | 2 | accepted | 2020-10-01 | NULL | NULL | NULL | 3 | conditionally accepted | 2020-10-03 | NULL | NULL | NULL
10035 | 3 | conditionally accepted | 2020-10-03 | 2 | accepted | 2020-10-01 | | NULL | NULL | NULL | NULL | NULL | NULL
如何通过重写这部分来提高我的观点?我正在考虑使用 CTE 来分离上面的部分。有什么想法吗?
您可以使用 LAG 和 LEAD 来计算相关的状态键,而不是 A 上的 self-joins。这意味着它只需要从 A 读取行一次 - 但需要根据情况进行测试在你的 database/etc.
下面是上述内容的 SQL 示例。注意 - SQL 已在提供数据的问题更新后更新。
WITH a1 AS
(SELECT A.ID,
A.status_key AS a1_status_key,
A.status_date AS a1_status_date,
LAG(A.status_key, 1) OVER (PARTITION BY A.id ORDER BY A.seq) AS a2_status_key,
LAG(A.status_date, 1) OVER (PARTITION BY A.id ORDER BY A.seq) AS a2_status_date,
LEAD(A.status_key, 1) OVER (PARTITION BY A.id ORDER BY A.seq) AS a3_status_key,
LEAD(A.status_date, 1) OVER (PARTITION BY A.id ORDER BY A.seq) AS a3_status_date,
LEAD(A.status_key, 2) OVER (PARTITION BY A.id ORDER BY A.seq) AS a4_status_key,
LEAD(A.status_date, 2) OVER (PARTITION BY A.id ORDER BY A.seq) AS a4_status_date
FROM A
)
SELECT a1.id,
a1.a1_status_key as current_status_key,
b1.status_name as current_status_name,
a1.a1_status_date as current_status_date,
a1.a2_status_key as previous_status_key,
b2.status_name as previous_status_name,
a1.a2_status_date as previous_status_date,
a1.a3_status_key as next_status_key,
b3.status_name as next_status_name,
a1.a3_status_date as next_status_date,
a1.a4_status_key as next_2_status_key,
b4.status_name as next_2_status_name,
a1.a4_status_date as next_2_status_date
FROM a1
LEFT JOIN B b1 ON a1.a1_status_key = b1.status_key
LEFT JOIN B b2 ON a1.a2_status_key = b2.status_key
LEFT JOIN B b3 ON a1.a3_status_key = b3.status_key
LEFT JOIN B b4 ON a1.a4_status_key = b4.status_key;
这里是 db<>fiddle 使用温度 tables。
如果聚集索引在 id, seq
上,我认为它也会有很大帮助。如果 table A 实际上更大并且有其他值,那么在这两列上使用 non-clustered 索引,然后包括其他相关列可能会更好,例如 id, seq, status_date, status_key
.
旧版本
WITH a1 AS
(SELECT ...,
LEAD(A.status_key, 1) OVER (PARTITION BY A.id ORDER BY A.seq) AS a2_status_key,
LAG(A.status_key, 1) OVER (PARTITION BY A.id ORDER BY A.seq) AS a3_status_key,
LAG(A.status_key, 2) OVER (PARTITION BY A.id ORDER BY A.seq) AS a4_status_key
FROM A
)
SELECT a1.*, ...
FROM a1
LEFT JOIN B b1 ON a1.status_key = b1.status_key
LEFT JOIN B b2 ON a1.a2_status_key = b2.status_key
LEFT JOIN B b3 ON a1.a3_status_key = b3.status_key
LEFT JOIN B b4 ON a1.a4_status_key = b4.status_key;