Select 行来自基于相关节点的层次结构
Select rows from hierarchy based on related nodes
我有一个self-referencingtableFoo
[Id] int NOT NULL,
[ParentId] int NULL, --Foreign key to [Id]
[Type] char(1) NOT NULL
[Id]
是聚簇主键,在 [ParentId]
和 [Type]
列上建立索引。
假设层次结构的最大深度为 1(child 个节点不能有 child 个节点)。
我想获取满足以下条件的 Foo 的所有行:
- 类型是 A
- 家谱中有 B
- 在它的家谱中有 C 或 D
下面的查询使用了 JOIN 的 returns 想要的结果,但是性能很糟糕
SELECT DISTINCT [Main].*
FROM Foo AS [Main]
--[Main] may not be root node
LEFT OUTER JOIN Foo AS [Parent]
ON [Parent].[Id] = [Main].[ParentId]
--Must have a B in tree
INNER JOIN Foo AS [NodeB]
ON (
[NodeB].[Pid] = [Main].[Pid] --Sibling
OR [NodeB].[ParentId] = [Main].[Id] --Child
OR [NodeB].[Id] = [Parent].[Id] --Parent
)
AND [NodeB].[Type] = 'B'
--Must have a C or D in tree
INNER JOIN Foo AS [NodeCD]
ON (
[NodeCD].[Pid] = [Main].[Pid] --Sibling
OR [NodeCD].[ParentId] = [Main].[Id] --Child
OR [NodeCD].[Id] = [Parent].[Id] --Parent
)
AND [NodeCD].[Type] IN ('C', 'D')
WHERE [Main].[Type] = 'A'
根据仅限于查看 650,000 行中的前 10,000 行的实际执行计划
如果我从查询中删除 --Parent 行
OR [NodeB].[Id] = [Parent].[Id] --Parent
OR [NodeCD].[Id] = [Parent].[Id] --Parent
然后执行几乎是瞬时的,但它错过了 A 是 child 并且只有一个兄弟
的情况
Misses this: Catches this:
B B
├A ├A
└C ├B
└C
我试图想出一个 CTE 来做这件事,因为它在性能方面似乎更有希望,但我一直无法弄清楚如何排除那些不满足标准的树。
到目前为止的 CTE
WITH [Parent] AS
(
SELECT *
FROM [Foo]
WHERE [ParentId] IS NULL
UNION ALL
SELECT [Child].*
FROM Foo AS [Child]
JOIN [Parent]
ON [Child].[ParentId] = [Parent].Id
WHERE [Child].[Type] = 'P'
UNION ALL
SELECT [ChildCD].*
FROM Foo AS [ChildCD]
JOIN [Parent]
ON [ChildCD].[ParentId] = [Parent].Id
WHERE [ChildCD].[Type] IN ('C', 'D')
)
SELECT *
FROM [Parent]
WHERE [Type] = 'I';
但是,如果我尝试添加 Sibling-Child-Parent OR 语句,我会达到最大递归级别 100。
用这组数据优化它并不容易,但也许可以试试这个。 LEFT OUTER JOIN 似乎是多余的。此外,执行计划不会在内部循环中显示 96% 的命中率。
SELECT DISTINCT [Main].*
FROM Foo AS [Main]
--Must have a B in tree
INNER JOIN Foo AS [NodeB]
ON (
[NodeB].[ParentId] = [Main].[ParentId] --Sibling
OR [NodeB].[ParentId] = [Main].[Id] --Child
OR [NodeB].[Id] = [Main].[ParentId] --Parent
)
AND [NodeB].[Type] = 'B'
--Must have a C or D in tree
INNER JOIN Foo AS [NodeCD]
ON (
[NodeCD].[ParentId] = [Main].[ParentId] --Sibling
OR [NodeCD].[ParentId] = [Main].[Id] --Child
OR [NodeCD].[Id] = [Main].[ParentId] --Parent
)
AND [NodeCD].[Type] IN ('C', 'D')
WHERE [Main].[Type] = 'A'
请post你的结果。希望这会有所帮助。
哎呀,这比我想象的要长,肯定有更好的方法,但这是我的看法:
WITH CTE AS
(
SELECT Id, ParentId FamilyId, [Type]
FROM dbo.Foo
UNION
SELECT A.Id, B.Id, B.[Type]
FROM dbo.Foo A
INNER JOIN dbo.Foo B
ON A.ParentId = B.Id
)
SELECT DISTINCT B.Id
FROM CTE A
INNER JOIN dbo.Foo B
ON A.Id = B.Id
OR A.FamilyId = B.Id
WHERE B.[Type] = 'A'
AND EXISTS( SELECT 1 FROM CTE
WHERE FamilyId = A.FamilyId
AND [Type] = 'B')
AND EXISTS( SELECT 1 FROM CTE
WHERE FamilyId = A.FamilyId
AND [Type] IN ('C','D'));
Here is the modified sqlfiddle.
这样的怎么样?
select
[F].[Id]
from
[Foo] [F]
where
[F].[Type] = 'A' and
(
(
[F].[ParentId] is null and
exists (select 1 from [Foo] [Child] where [F].[Id] = [Child].[ParentId] and [Child].[Type] = 'B') and
exists (select 1 from [Foo] [Child] where [F].[Id] = [Child].[ParentId] and [Child].[Type] in ('C', 'D'))
) or
(
[F].[ParentId] is not null and
exists (select 1 from [Foo] [ParentOrSibling] where [F].[ParentId] in ([ParentOrSibling].[Id], [ParentOrSibling].[ParentId]) and [ParentOrSibling].[Type] = 'B') and
exists (select 1 from [Foo] [ParentOrSibling] where [F].[ParentId] in ([ParentOrSibling].[Id], [ParentOrSibling].[ParentId]) and [ParentOrSibling].[Type] in ('C', 'D'))
)
);
使用递归 CTE。这适用于任何多级层次结构:
DECLARE @t TABLE
(
ID INT ,
ParentID INT ,
Type CHAR(1)
)
INSERT INTO @t
VALUES ( 1, NULL, 'A' ),
( 2, 1, 'B' ),
( 3, NULL, 'C' ),
( 4, NULL, 'A' ),
( 5, 4, 'B' ),
( 6, 4, 'C' ),
( 7, NULL, 'A' ),
( 8, 7, 'B' ),
( 9, 8, 'D' ),
( 10, NULL, 'D' ),
( 11, 10, 'A' ),
( 12, 11, 'B' ),
( 13, 8, 'D' );
WITH cte1
AS ( SELECT ID ,
ParentID ,
Type ,
ID AS GroupID ,
0 AS B ,
0 AS CD
FROM @t
WHERE Type = 'A'
UNION ALL
SELECT t.ID ,
t.ParentID ,
t.Type ,
c.GroupID ,
CASE WHEN t.Type = 'B' THEN 1
ELSE 0
END ,
CASE WHEN t.Type IN ( 'C', 'D' ) THEN 1
ELSE 0
END
FROM @t t
JOIN cte1 c ON t.ParentID = c.ID
),
cte2
AS ( SELECT ID ,
ParentID ,
Type ,
ID AS GroupID ,
0 AS B ,
0 AS CD
FROM @t
WHERE Type = 'A'
UNION ALL
SELECT t.ID ,
t.ParentID ,
t.Type ,
c.GroupID ,
CASE WHEN t.Type = 'B' THEN 1
ELSE 0
END ,
CASE WHEN t.Type IN ( 'C', 'D' ) THEN 1
ELSE 0
END
FROM @t t
JOIN cte2 c ON t.ID = c.ParentID
),
filter
AS ( SELECT ID ,
Type ,
SUM(B) OVER ( PARTITION BY GroupID ) AS B ,
SUM(CD) OVER ( PARTITION BY GroupID ) AS CD
FROM ( SELECT *
FROM cte1
UNION
SELECT *
FROM cte2
) t
)
SELECT t.*
FROM filter f
JOIN @t t ON t.ID = f.ID
WHERE f.Type = 'A'
AND B > 0
AND cd > 0
输出:
ID ParentID Type
4 NULL A
7 NULL A
11 10 A
被检查的节点是根节点的情况与它是子节点的情况截然不同,您最好分别查询这两个节点并形成 UNION ALL
两套。但是,您可以使用一个通用的 table 表达式来简化,该表达式标识那些包含您要查找的节点的树。总的来说,这可能看起来像这样:
WITH [TargetFamilies] AS (
SELECT
COALESCE(ParentId, Id) AS FamilyId
FROM Foo
GROUP BY COALESCE(ParentId, Id)
HAVING
COUNT(CASE Type WHEN 'B' THEN 1 END) > 0
AND COUNT(CASE Type WHEN 'C' THEN 1 WHEN 'D' THEN 1 END) > 0
)
-- root nodes
SELECT [Main].*
FROM
Foo AS [Main]
JOIN [TargetFamilies] ON [Main].Id = [TargetFamilies].FamilyId
WHERE
[Main].Type = 'A'
UNION ALL
-- child nodes
SELECT
[Main].*
FROM
Foo AS [Main]
JOIN [TargetFamilies] ON [Main].ParentId = [TargetFamilies].FamilyId
WHERE
[Main].Type = 'A'
我无法预测效率,但这是另一个解决方案:
SELECT *
FROM Foo AS f
WHERE Type = 'A'
AND ParentId IS NULL
AND EXISTS
( SELECT *
FROM Foo AS ch
WHERE ch.ParentId = f.Id
AND ch.Type = 'B'
)
AND EXISTS
( SELECT *
FROM Foo AS ch
WHERE ch.ParentId = f.Id
AND ch.Type IN ('C', 'D')
)
UNION ALL
SELECT *
FROM Foo AS f
WHERE Type = 'A'
AND ParentId IS NOT NULL
AND EXISTS
( SELECT 1
FROM
( SELECT *
FROM (VALUES ('B'), ('C'), ('D')) AS x (Type)
EXCEPT
SELECT p.Type
FROM Foo AS p
WHERE f.ParentId = p.Id
EXCEPT
SELECT sib.Type
FROM Foo AS sib
WHERE f.ParentId = sib.ParentId
) AS x
HAVING MIN(Type) = MAX(Type) AND MIN(Type) <> 'B'
OR MIN(Type) IS NULL
) ;
中测试
我有一个self-referencingtableFoo
[Id] int NOT NULL,
[ParentId] int NULL, --Foreign key to [Id]
[Type] char(1) NOT NULL
[Id]
是聚簇主键,在 [ParentId]
和 [Type]
列上建立索引。
假设层次结构的最大深度为 1(child 个节点不能有 child 个节点)。
我想获取满足以下条件的 Foo 的所有行:
- 类型是 A
- 家谱中有 B
- 在它的家谱中有 C 或 D
下面的查询使用了 JOIN 的 returns 想要的结果,但是性能很糟糕
SELECT DISTINCT [Main].*
FROM Foo AS [Main]
--[Main] may not be root node
LEFT OUTER JOIN Foo AS [Parent]
ON [Parent].[Id] = [Main].[ParentId]
--Must have a B in tree
INNER JOIN Foo AS [NodeB]
ON (
[NodeB].[Pid] = [Main].[Pid] --Sibling
OR [NodeB].[ParentId] = [Main].[Id] --Child
OR [NodeB].[Id] = [Parent].[Id] --Parent
)
AND [NodeB].[Type] = 'B'
--Must have a C or D in tree
INNER JOIN Foo AS [NodeCD]
ON (
[NodeCD].[Pid] = [Main].[Pid] --Sibling
OR [NodeCD].[ParentId] = [Main].[Id] --Child
OR [NodeCD].[Id] = [Parent].[Id] --Parent
)
AND [NodeCD].[Type] IN ('C', 'D')
WHERE [Main].[Type] = 'A'
根据仅限于查看 650,000 行中的前 10,000 行的实际执行计划
如果我从查询中删除 --Parent 行
OR [NodeB].[Id] = [Parent].[Id] --Parent
OR [NodeCD].[Id] = [Parent].[Id] --Parent
然后执行几乎是瞬时的,但它错过了 A 是 child 并且只有一个兄弟
的情况Misses this: Catches this:
B B
├A ├A
└C ├B
└C
我试图想出一个 CTE 来做这件事,因为它在性能方面似乎更有希望,但我一直无法弄清楚如何排除那些不满足标准的树。
到目前为止的 CTE
WITH [Parent] AS
(
SELECT *
FROM [Foo]
WHERE [ParentId] IS NULL
UNION ALL
SELECT [Child].*
FROM Foo AS [Child]
JOIN [Parent]
ON [Child].[ParentId] = [Parent].Id
WHERE [Child].[Type] = 'P'
UNION ALL
SELECT [ChildCD].*
FROM Foo AS [ChildCD]
JOIN [Parent]
ON [ChildCD].[ParentId] = [Parent].Id
WHERE [ChildCD].[Type] IN ('C', 'D')
)
SELECT *
FROM [Parent]
WHERE [Type] = 'I';
但是,如果我尝试添加 Sibling-Child-Parent OR 语句,我会达到最大递归级别 100。
用这组数据优化它并不容易,但也许可以试试这个。 LEFT OUTER JOIN 似乎是多余的。此外,执行计划不会在内部循环中显示 96% 的命中率。
SELECT DISTINCT [Main].*
FROM Foo AS [Main]
--Must have a B in tree
INNER JOIN Foo AS [NodeB]
ON (
[NodeB].[ParentId] = [Main].[ParentId] --Sibling
OR [NodeB].[ParentId] = [Main].[Id] --Child
OR [NodeB].[Id] = [Main].[ParentId] --Parent
)
AND [NodeB].[Type] = 'B'
--Must have a C or D in tree
INNER JOIN Foo AS [NodeCD]
ON (
[NodeCD].[ParentId] = [Main].[ParentId] --Sibling
OR [NodeCD].[ParentId] = [Main].[Id] --Child
OR [NodeCD].[Id] = [Main].[ParentId] --Parent
)
AND [NodeCD].[Type] IN ('C', 'D')
WHERE [Main].[Type] = 'A'
请post你的结果。希望这会有所帮助。
哎呀,这比我想象的要长,肯定有更好的方法,但这是我的看法:
WITH CTE AS
(
SELECT Id, ParentId FamilyId, [Type]
FROM dbo.Foo
UNION
SELECT A.Id, B.Id, B.[Type]
FROM dbo.Foo A
INNER JOIN dbo.Foo B
ON A.ParentId = B.Id
)
SELECT DISTINCT B.Id
FROM CTE A
INNER JOIN dbo.Foo B
ON A.Id = B.Id
OR A.FamilyId = B.Id
WHERE B.[Type] = 'A'
AND EXISTS( SELECT 1 FROM CTE
WHERE FamilyId = A.FamilyId
AND [Type] = 'B')
AND EXISTS( SELECT 1 FROM CTE
WHERE FamilyId = A.FamilyId
AND [Type] IN ('C','D'));
Here is the modified sqlfiddle.
这样的怎么样?
select
[F].[Id]
from
[Foo] [F]
where
[F].[Type] = 'A' and
(
(
[F].[ParentId] is null and
exists (select 1 from [Foo] [Child] where [F].[Id] = [Child].[ParentId] and [Child].[Type] = 'B') and
exists (select 1 from [Foo] [Child] where [F].[Id] = [Child].[ParentId] and [Child].[Type] in ('C', 'D'))
) or
(
[F].[ParentId] is not null and
exists (select 1 from [Foo] [ParentOrSibling] where [F].[ParentId] in ([ParentOrSibling].[Id], [ParentOrSibling].[ParentId]) and [ParentOrSibling].[Type] = 'B') and
exists (select 1 from [Foo] [ParentOrSibling] where [F].[ParentId] in ([ParentOrSibling].[Id], [ParentOrSibling].[ParentId]) and [ParentOrSibling].[Type] in ('C', 'D'))
)
);
使用递归 CTE。这适用于任何多级层次结构:
DECLARE @t TABLE
(
ID INT ,
ParentID INT ,
Type CHAR(1)
)
INSERT INTO @t
VALUES ( 1, NULL, 'A' ),
( 2, 1, 'B' ),
( 3, NULL, 'C' ),
( 4, NULL, 'A' ),
( 5, 4, 'B' ),
( 6, 4, 'C' ),
( 7, NULL, 'A' ),
( 8, 7, 'B' ),
( 9, 8, 'D' ),
( 10, NULL, 'D' ),
( 11, 10, 'A' ),
( 12, 11, 'B' ),
( 13, 8, 'D' );
WITH cte1
AS ( SELECT ID ,
ParentID ,
Type ,
ID AS GroupID ,
0 AS B ,
0 AS CD
FROM @t
WHERE Type = 'A'
UNION ALL
SELECT t.ID ,
t.ParentID ,
t.Type ,
c.GroupID ,
CASE WHEN t.Type = 'B' THEN 1
ELSE 0
END ,
CASE WHEN t.Type IN ( 'C', 'D' ) THEN 1
ELSE 0
END
FROM @t t
JOIN cte1 c ON t.ParentID = c.ID
),
cte2
AS ( SELECT ID ,
ParentID ,
Type ,
ID AS GroupID ,
0 AS B ,
0 AS CD
FROM @t
WHERE Type = 'A'
UNION ALL
SELECT t.ID ,
t.ParentID ,
t.Type ,
c.GroupID ,
CASE WHEN t.Type = 'B' THEN 1
ELSE 0
END ,
CASE WHEN t.Type IN ( 'C', 'D' ) THEN 1
ELSE 0
END
FROM @t t
JOIN cte2 c ON t.ID = c.ParentID
),
filter
AS ( SELECT ID ,
Type ,
SUM(B) OVER ( PARTITION BY GroupID ) AS B ,
SUM(CD) OVER ( PARTITION BY GroupID ) AS CD
FROM ( SELECT *
FROM cte1
UNION
SELECT *
FROM cte2
) t
)
SELECT t.*
FROM filter f
JOIN @t t ON t.ID = f.ID
WHERE f.Type = 'A'
AND B > 0
AND cd > 0
输出:
ID ParentID Type
4 NULL A
7 NULL A
11 10 A
被检查的节点是根节点的情况与它是子节点的情况截然不同,您最好分别查询这两个节点并形成 UNION ALL
两套。但是,您可以使用一个通用的 table 表达式来简化,该表达式标识那些包含您要查找的节点的树。总的来说,这可能看起来像这样:
WITH [TargetFamilies] AS (
SELECT
COALESCE(ParentId, Id) AS FamilyId
FROM Foo
GROUP BY COALESCE(ParentId, Id)
HAVING
COUNT(CASE Type WHEN 'B' THEN 1 END) > 0
AND COUNT(CASE Type WHEN 'C' THEN 1 WHEN 'D' THEN 1 END) > 0
)
-- root nodes
SELECT [Main].*
FROM
Foo AS [Main]
JOIN [TargetFamilies] ON [Main].Id = [TargetFamilies].FamilyId
WHERE
[Main].Type = 'A'
UNION ALL
-- child nodes
SELECT
[Main].*
FROM
Foo AS [Main]
JOIN [TargetFamilies] ON [Main].ParentId = [TargetFamilies].FamilyId
WHERE
[Main].Type = 'A'
我无法预测效率,但这是另一个解决方案:
SELECT *
FROM Foo AS f
WHERE Type = 'A'
AND ParentId IS NULL
AND EXISTS
( SELECT *
FROM Foo AS ch
WHERE ch.ParentId = f.Id
AND ch.Type = 'B'
)
AND EXISTS
( SELECT *
FROM Foo AS ch
WHERE ch.ParentId = f.Id
AND ch.Type IN ('C', 'D')
)
UNION ALL
SELECT *
FROM Foo AS f
WHERE Type = 'A'
AND ParentId IS NOT NULL
AND EXISTS
( SELECT 1
FROM
( SELECT *
FROM (VALUES ('B'), ('C'), ('D')) AS x (Type)
EXCEPT
SELECT p.Type
FROM Foo AS p
WHERE f.ParentId = p.Id
EXCEPT
SELECT sib.Type
FROM Foo AS sib
WHERE f.ParentId = sib.ParentId
) AS x
HAVING MIN(Type) = MAX(Type) AND MIN(Type) <> 'B'
OR MIN(Type) IS NULL
) ;
中测试