如何有效地更新依赖于以前记录的 table
How to effeciently update table that depends on previous records
我的 SQL 服务器 table 看起来像这样:
id Distance a b Grp
--------------------------------
1 0.0000000000 100 114 NULL
2 0.1000000000 64 125 NULL
3 0.1000000000 88 100 NULL
4 0.1000000000 65 125 NULL
5 0.1000000000 63 64 NULL
6 0.1000000000 65 66 NULL
7 0.2000000000 63 66 NULL
8 0.2000000000 10 61 NULL
9 0.2000000000 19 61 NULL
10 0.2000000000 30 61 NULL
11 0.2000000000 10 65 NULL
12 0.2000000000 10 94 NULL
13 0.2000000000 19 65 NULL
14 0.2000000000 19 94 NULL
15 0.2000000000 30 94 NULL
16 0.2000000000 60 94 NULL
17 0.2000000000 61 94 NULL
Grp
栏应填写如下
第一条记录Grp
是1
如果下一行的 a 和 b 的值在前面的任何一行中,那么它将采用第一行 Grp
值
如果下一行的 a 和 b 的值不在任何前面的行中,则 Grp
值将是 max Grp + 1
如果记录 id = 3 那么 b = 100 的值存在于前面的行中,它出现的第一个是 id = 1,即 Grp = 1
所以 Grp
对于 id 3
将是 1
我的 table 应该是这样的:
id Distance a b Grp
--------------------------------
1 0.0000000000 100 114 1
2 0.1000000000 64 125 2
3 0.1000000000 88 100 1
4 0.1000000000 65 125 2
5 0.1000000000 63 64 2
6 0.1000000000 65 66 2
7 0.2000000000 63 66 2
8 0.2000000000 10 61 3
9 0.2000000000 19 61 3
10 0.2000000000 30 61 3
11 0.2000000000 10 65 2
12 0.2000000000 10 94 3
13 0.2000000000 19 65 2
14 0.2000000000 19 94 3
15 0.2000000000 30 94 3
16 0.2000000000 60 94 3
17 0.2000000000 61 94 3
我已经构建了这个运行良好的脚本,但是它非常慢,我有什么办法可以让它变得更好(没有循环)?
DECLARE @T AS TABLE
(
id int IDENTITY,
Distance decimal(18, 10),
a int,
b int,
Grp int
)
INSERT INTO @T(Distance, a, b)
SELECT Distance, a, b
FROM MyTable
ORDER BY Distance
UPDATE @T
SET Grp = 1
WHERE id = 1
DECLARE @i int = 2, @max int, @min int,
@grp int, @a int, @b int, @maxgrp int = 1
SELECT @max = MAX(id) FROM @T
WHILE @i <= @max
BEGIN
SELECT @a = a, @b = b
FROM @T
WHERE id = @i
SELECT @min = MIN(id)
FROM @T
WHERE id < @i AND a IN (@a, @b) OR b IN (@a, @b)
SELECT @grp = grp
FROM @T
WHERE id = @min
IF @grp IS NULL
BEGIN
SET @maxgrp = @maxgrp + 1
SET @grp = @maxgrp
END
UPDATE @T
SET Grp = @grp
WHERE id = @i
SET @i = @i + 1
END
SELECT * FROM @T
如果只需要Grp
分组,可以这样简化:
declare @t table (id int identity, Distance decimal(18,10)
, a int, b int, Grp int)
insert @t (Distance, a, b)
-- select Distance, a, b From MyTable order by Distance
values
(0.0,100,114),(0.1, 64,125),(0.1, 88,100),(0.1, 65,125),
(0.1, 63, 64),(0.1, 65, 66),(0.2, 63, 66),(0.2, 10, 61),
(0.2, 19, 61),(0.2, 30, 61),(0.2, 10, 65),(0.2, 10, 94),
(0.2, 19, 65),(0.2, 19, 94),(0.2, 30, 94),(0.2, 60, 94),
(0.2, 61, 94)
declare @i int, @d float, @a int, @b int, @g int
declare c1 cursor for select * from @t for update of Grp
open c1
fetch next from c1 into @i, @d, @a, @b, @g
update @t set Grp = 1 where current of c1
fetch next from c1 into @i, @d, @a, @b, @g
while @@fetch_status = 0
begin
update @t set Grp =
isNull((select top 1 (Grp)
from @t t2
where t2.id < @i
and (@a in (t2.a , t2.b)
or @b in (t2.a , t2.b)))
, @i)
where current of c1
fetch next from c1 into @i, @d, @a, @b, @g
end
close c1
deallocate c1
-- If you need consecutive Grp numbers ..
declare @u table (id int identity, Grp int)
insert @u (Grp)
select distinct Grp from @t order by Grp
update @t set Grp = u.id
from @t t
join @u u on (u.Grp = t.Grp and u.Grp<>u.id)
select * from @t
以下查询会产生您想要的输出。
WITH t3 as(
SELECT *
FROM
(SELECT id,Distance,a,b,rnk,
CASE WHEN rnk > 0 THEN NULL ELSE grp END AS Grp,
Row_Number() OVER(ORDER BY id) AS seq
FROM
(SELECT id,Distance,a,b,rnk,ROW_NUMBER() OVER(PARTITION BY rnk ORDER BY id) AS grp
FROM
(SELECT id,Distance,a,b,
ISNULL((SELECT top 1 id FROM tb s2 WHERE s2.id < s1.id AND (s2.a = s1.a OR s2.b = s1.b OR s2.b = s1.a OR s2.a = s1.b)),0) AS rnk
FROM tb s1) T) T) T
WHERE Grp > 0
)
SELECT id,Distance,a,b,min(grp)
FROM
(SELECT distinct *
FROM
(SELECT t1.id,t1.Distance,t1.a,t1.b,t3.grp
FROM
(SELECT id,Distance,a,b,grp,
ISNULL((SELECT top 1 id FROM tb s2 WHERE s2.id < s1.id AND (s2.a = s1.a OR s2.b = s1.b OR s2.b = s1.a OR s2.a = s1.b)),0) AS rnk
FROM tb s1) t1
JOIN tb t2 ON t1.rnk = t2.id
JOIN t3 ON
t1.a = t3.a OR t1.a = t3.b OR
t1.b = t3.b OR t1.b = t3.a OR
t2.b = t3.b OR t2.b = t3.a OR
t2.a = t3.a OR t2.a = t3.b) t1
UNION ALL
SELECT id,Distance,a,b,grp
FROM t3) T
GROUP BY id,Distance,a,b
ORDER BY id
演示在 db<>fiddle
使用递归而不是循环的答案。
首先,确定第一行出现的任何值(来自 a 或 b)...
CREATE TABLE #node(
row_id INT,
old_val INT,
new_val INT,
link_count INT
INDEX node_old_new CLUSTERED (link_count, old_val, new_val, row_id),
);
INSERT INTO
#node
SELECT
e.id, twinned.*, COUNT(*) OVER (PARTITION BY e.id)
FROM
#example AS e
CROSS APPLY
(
SELECT e.a AS old_val, e.b AS new_val
UNION ALL
SELECT e.b AS old_val, e.a AS new_val
)
AS twinned
WHERE
NOT EXISTS (
SELECT *
FROM #example AS lookup
WHERE twinned.new_val IN (lookup.a, lookup.b)
AND lookup.id < e.id
)
;
其中link_count = 2
、两者a
和 b
第一次出现在这一行,表示这一行将开始一个新的分组。
其中link_count = 1
、new_val
以前从未见过,但old_val
却出现过。因此,一旦 old_val
被分配了一个组,我们就可以将该组传播到 new_val
。
这只是创建闭包的树遍历 table。
WITH
closure AS
(
SELECT
new_val AS val,
DENSE_RANK() OVER (ORDER BY row_id) AS grp,
row_id AS row_id,
0 AS depth
FROM
#node
WHERE
link_count = 2
UNION ALL
SELECT
r.new_val,
c.grp,
r.row_id,
c.depth + 1
FROM
closure AS c
INNER JOIN
#node AS r
ON r.old_val = c.val
AND r.link_count = 1
)
现在,对于 a
或 b
中的任何值,我们可以在闭包 table 中查找该值的组。我们可能会得到两个不同的组,一组来自查找 a
,一组来自查找 b
;所以,我们取从最早的行开始分配的组。
SELECT
e.*, g.grp
FROM
#example e
CROSS APPLY
(
SELECT TOP 1
c.grp
FROM
#closure AS c
WHERE
c.val IN (e.a, e.b)
ORDER BY
c.row_id
)
AS g
ORDER BY
e.id
我的 SQL 服务器 table 看起来像这样:
id Distance a b Grp
--------------------------------
1 0.0000000000 100 114 NULL
2 0.1000000000 64 125 NULL
3 0.1000000000 88 100 NULL
4 0.1000000000 65 125 NULL
5 0.1000000000 63 64 NULL
6 0.1000000000 65 66 NULL
7 0.2000000000 63 66 NULL
8 0.2000000000 10 61 NULL
9 0.2000000000 19 61 NULL
10 0.2000000000 30 61 NULL
11 0.2000000000 10 65 NULL
12 0.2000000000 10 94 NULL
13 0.2000000000 19 65 NULL
14 0.2000000000 19 94 NULL
15 0.2000000000 30 94 NULL
16 0.2000000000 60 94 NULL
17 0.2000000000 61 94 NULL
Grp
栏应填写如下
第一条记录
Grp
是1如果下一行的 a 和 b 的值在前面的任何一行中,那么它将采用第一行
Grp
值如果下一行的 a 和 b 的值不在任何前面的行中,则
Grp
值将是max Grp + 1
如果记录 id = 3 那么 b = 100 的值存在于前面的行中,它出现的第一个是 id = 1,即
将是 1Grp = 1
所以Grp
对于 id 3
我的 table 应该是这样的:
id Distance a b Grp
--------------------------------
1 0.0000000000 100 114 1
2 0.1000000000 64 125 2
3 0.1000000000 88 100 1
4 0.1000000000 65 125 2
5 0.1000000000 63 64 2
6 0.1000000000 65 66 2
7 0.2000000000 63 66 2
8 0.2000000000 10 61 3
9 0.2000000000 19 61 3
10 0.2000000000 30 61 3
11 0.2000000000 10 65 2
12 0.2000000000 10 94 3
13 0.2000000000 19 65 2
14 0.2000000000 19 94 3
15 0.2000000000 30 94 3
16 0.2000000000 60 94 3
17 0.2000000000 61 94 3
我已经构建了这个运行良好的脚本,但是它非常慢,我有什么办法可以让它变得更好(没有循环)?
DECLARE @T AS TABLE
(
id int IDENTITY,
Distance decimal(18, 10),
a int,
b int,
Grp int
)
INSERT INTO @T(Distance, a, b)
SELECT Distance, a, b
FROM MyTable
ORDER BY Distance
UPDATE @T
SET Grp = 1
WHERE id = 1
DECLARE @i int = 2, @max int, @min int,
@grp int, @a int, @b int, @maxgrp int = 1
SELECT @max = MAX(id) FROM @T
WHILE @i <= @max
BEGIN
SELECT @a = a, @b = b
FROM @T
WHERE id = @i
SELECT @min = MIN(id)
FROM @T
WHERE id < @i AND a IN (@a, @b) OR b IN (@a, @b)
SELECT @grp = grp
FROM @T
WHERE id = @min
IF @grp IS NULL
BEGIN
SET @maxgrp = @maxgrp + 1
SET @grp = @maxgrp
END
UPDATE @T
SET Grp = @grp
WHERE id = @i
SET @i = @i + 1
END
SELECT * FROM @T
如果只需要Grp
分组,可以这样简化:
declare @t table (id int identity, Distance decimal(18,10)
, a int, b int, Grp int)
insert @t (Distance, a, b)
-- select Distance, a, b From MyTable order by Distance
values
(0.0,100,114),(0.1, 64,125),(0.1, 88,100),(0.1, 65,125),
(0.1, 63, 64),(0.1, 65, 66),(0.2, 63, 66),(0.2, 10, 61),
(0.2, 19, 61),(0.2, 30, 61),(0.2, 10, 65),(0.2, 10, 94),
(0.2, 19, 65),(0.2, 19, 94),(0.2, 30, 94),(0.2, 60, 94),
(0.2, 61, 94)
declare @i int, @d float, @a int, @b int, @g int
declare c1 cursor for select * from @t for update of Grp
open c1
fetch next from c1 into @i, @d, @a, @b, @g
update @t set Grp = 1 where current of c1
fetch next from c1 into @i, @d, @a, @b, @g
while @@fetch_status = 0
begin
update @t set Grp =
isNull((select top 1 (Grp)
from @t t2
where t2.id < @i
and (@a in (t2.a , t2.b)
or @b in (t2.a , t2.b)))
, @i)
where current of c1
fetch next from c1 into @i, @d, @a, @b, @g
end
close c1
deallocate c1
-- If you need consecutive Grp numbers ..
declare @u table (id int identity, Grp int)
insert @u (Grp)
select distinct Grp from @t order by Grp
update @t set Grp = u.id
from @t t
join @u u on (u.Grp = t.Grp and u.Grp<>u.id)
select * from @t
以下查询会产生您想要的输出。
WITH t3 as(
SELECT *
FROM
(SELECT id,Distance,a,b,rnk,
CASE WHEN rnk > 0 THEN NULL ELSE grp END AS Grp,
Row_Number() OVER(ORDER BY id) AS seq
FROM
(SELECT id,Distance,a,b,rnk,ROW_NUMBER() OVER(PARTITION BY rnk ORDER BY id) AS grp
FROM
(SELECT id,Distance,a,b,
ISNULL((SELECT top 1 id FROM tb s2 WHERE s2.id < s1.id AND (s2.a = s1.a OR s2.b = s1.b OR s2.b = s1.a OR s2.a = s1.b)),0) AS rnk
FROM tb s1) T) T) T
WHERE Grp > 0
)
SELECT id,Distance,a,b,min(grp)
FROM
(SELECT distinct *
FROM
(SELECT t1.id,t1.Distance,t1.a,t1.b,t3.grp
FROM
(SELECT id,Distance,a,b,grp,
ISNULL((SELECT top 1 id FROM tb s2 WHERE s2.id < s1.id AND (s2.a = s1.a OR s2.b = s1.b OR s2.b = s1.a OR s2.a = s1.b)),0) AS rnk
FROM tb s1) t1
JOIN tb t2 ON t1.rnk = t2.id
JOIN t3 ON
t1.a = t3.a OR t1.a = t3.b OR
t1.b = t3.b OR t1.b = t3.a OR
t2.b = t3.b OR t2.b = t3.a OR
t2.a = t3.a OR t2.a = t3.b) t1
UNION ALL
SELECT id,Distance,a,b,grp
FROM t3) T
GROUP BY id,Distance,a,b
ORDER BY id
演示在 db<>fiddle
使用递归而不是循环的答案。
首先,确定第一行出现的任何值(来自 a 或 b)...
CREATE TABLE #node(
row_id INT,
old_val INT,
new_val INT,
link_count INT
INDEX node_old_new CLUSTERED (link_count, old_val, new_val, row_id),
);
INSERT INTO
#node
SELECT
e.id, twinned.*, COUNT(*) OVER (PARTITION BY e.id)
FROM
#example AS e
CROSS APPLY
(
SELECT e.a AS old_val, e.b AS new_val
UNION ALL
SELECT e.b AS old_val, e.a AS new_val
)
AS twinned
WHERE
NOT EXISTS (
SELECT *
FROM #example AS lookup
WHERE twinned.new_val IN (lookup.a, lookup.b)
AND lookup.id < e.id
)
;
其中link_count = 2
、两者a
和 b
第一次出现在这一行,表示这一行将开始一个新的分组。
其中link_count = 1
、new_val
以前从未见过,但old_val
却出现过。因此,一旦 old_val
被分配了一个组,我们就可以将该组传播到 new_val
。
这只是创建闭包的树遍历 table。
WITH
closure AS
(
SELECT
new_val AS val,
DENSE_RANK() OVER (ORDER BY row_id) AS grp,
row_id AS row_id,
0 AS depth
FROM
#node
WHERE
link_count = 2
UNION ALL
SELECT
r.new_val,
c.grp,
r.row_id,
c.depth + 1
FROM
closure AS c
INNER JOIN
#node AS r
ON r.old_val = c.val
AND r.link_count = 1
)
现在,对于 a
或 b
中的任何值,我们可以在闭包 table 中查找该值的组。我们可能会得到两个不同的组,一组来自查找 a
,一组来自查找 b
;所以,我们取从最早的行开始分配的组。
SELECT
e.*, g.grp
FROM
#example e
CROSS APPLY
(
SELECT TOP 1
c.grp
FROM
#closure AS c
WHERE
c.val IN (e.a, e.b)
ORDER BY
c.row_id
)
AS g
ORDER BY
e.id