SQL:优化递归 CTE
SQL: Optimizing Recursive CTE
示例table结构:
EmployeeId TeamleaderId TopTeamleaderId LEVEL ParentTree CompanyId
1 0 0 0 NULL 1
2 1 1 1 2>1 1
3 2 1 2 3>2>1 1
TeamleaderId
是 foreignKey
在同一个 table
中对 EmployeeId
的引用
目标:
每当使用 EmployeeId
、TeamleaderId
、CompanyId
在 table 中插入一行时,自动填充 TopTeamleaderId
、LEVEL
和 ParentTree
AFTER INSERT
触发
代码:
WITH CTE AS (
SELECT EmployeeId, TeamleaderId,0 AS [Level], CAST(EmployeeId AS varchar(100)) AS Heirarchy, TopTeamleaderId
FROM dbo.Employee
WHERE EmployeeId IN (SELECT EmployeeId FROM Employee WHERE TeamleaderId IS NULL
AND CompanyId IN(SELECT DISTINCT CompanyId FROM INSERTED))
UNION ALL
SELECT mgr.EmployeeId, mgr.TeamleaderId, CTE.[Level] +1 AS [Level],
CAST(( CAST(mgr.EmployeeId AS VARCHAR(100)) + '>' + CTE.Heirarchy) AS varchar(100)) AS Heirarchy, CTE.TopTeamleaderId
FROM CTE
INNER JOIN dbo.Employee AS mgr
ON TaskCTE.EmployeeId = mgr.ParentTeamleaderId
)
UPDATE Employee SET [LEVEL] = TC.[LEVEL], ParentTree = TC.Heirarchy, TopTeamleaderId = TC.TopTeamleaderId
FROM dbo.Employee AS Employee
JOIN (SELECT * FROM CTE WHERE EmployeeId IN(SELECT DISTINCT EmployeeId FROM INSERTED) AND ParentTeamleaderId IS NOT NULL) TC
ON
Employee.EmployeeId = TC.EmployeeId
问题:
想象一下,一家公司有 1000000 名员工,执行此查询需要很长时间。如何优化它以便只考虑插入行的父级?
递归 CTE 很棒,但如您所见,性能可能会因层次结构较大而受到影响。我坚信 临时表没有什么可耻的。
以下将在 0.784 秒内生成 200K 点层次结构。
例子
Select EmployeeId
,TeamleaderId
,Lvl=1
,TopTeamleaderId = 0
,ParentTree=cast(EmployeeId as varchar(500))
,CompanyID
Into #TempBld
From Employee
Where TeamleaderId is null
Declare @Cnt int=1
While @Cnt<=30 --<< Set Your Max Level
Begin
Insert Into #TempBld
Select A.EmployeeId
,A.TeamleaderId
,B.Lvl+1
,IIF(B.Lvl=1,B.EmployeeId,B.TopTeamleaderId)
,concat(A.EmployeeId,'>',B.ParentTree)
,A.CompanyID
From Employee A
Join #TempBld B on (B.Lvl=@Cnt and A.TeamleaderId=B.EmployeeId)
Set @Cnt=@Cnt+1
End
--Select * from #TempBld Order by ParentTree
Returns
示例table结构:
EmployeeId TeamleaderId TopTeamleaderId LEVEL ParentTree CompanyId
1 0 0 0 NULL 1
2 1 1 1 2>1 1
3 2 1 2 3>2>1 1
TeamleaderId
是 foreignKey
在同一个 table
EmployeeId
的引用
目标:
每当使用 EmployeeId
、TeamleaderId
、CompanyId
在 table 中插入一行时,自动填充 TopTeamleaderId
、LEVEL
和 ParentTree
AFTER INSERT
触发
代码:
WITH CTE AS (
SELECT EmployeeId, TeamleaderId,0 AS [Level], CAST(EmployeeId AS varchar(100)) AS Heirarchy, TopTeamleaderId
FROM dbo.Employee
WHERE EmployeeId IN (SELECT EmployeeId FROM Employee WHERE TeamleaderId IS NULL
AND CompanyId IN(SELECT DISTINCT CompanyId FROM INSERTED))
UNION ALL
SELECT mgr.EmployeeId, mgr.TeamleaderId, CTE.[Level] +1 AS [Level],
CAST(( CAST(mgr.EmployeeId AS VARCHAR(100)) + '>' + CTE.Heirarchy) AS varchar(100)) AS Heirarchy, CTE.TopTeamleaderId
FROM CTE
INNER JOIN dbo.Employee AS mgr
ON TaskCTE.EmployeeId = mgr.ParentTeamleaderId
)
UPDATE Employee SET [LEVEL] = TC.[LEVEL], ParentTree = TC.Heirarchy, TopTeamleaderId = TC.TopTeamleaderId
FROM dbo.Employee AS Employee
JOIN (SELECT * FROM CTE WHERE EmployeeId IN(SELECT DISTINCT EmployeeId FROM INSERTED) AND ParentTeamleaderId IS NOT NULL) TC
ON
Employee.EmployeeId = TC.EmployeeId
问题: 想象一下,一家公司有 1000000 名员工,执行此查询需要很长时间。如何优化它以便只考虑插入行的父级?
递归 CTE 很棒,但如您所见,性能可能会因层次结构较大而受到影响。我坚信 临时表没有什么可耻的。
以下将在 0.784 秒内生成 200K 点层次结构。
例子
Select EmployeeId
,TeamleaderId
,Lvl=1
,TopTeamleaderId = 0
,ParentTree=cast(EmployeeId as varchar(500))
,CompanyID
Into #TempBld
From Employee
Where TeamleaderId is null
Declare @Cnt int=1
While @Cnt<=30 --<< Set Your Max Level
Begin
Insert Into #TempBld
Select A.EmployeeId
,A.TeamleaderId
,B.Lvl+1
,IIF(B.Lvl=1,B.EmployeeId,B.TopTeamleaderId)
,concat(A.EmployeeId,'>',B.ParentTree)
,A.CompanyID
From Employee A
Join #TempBld B on (B.Lvl=@Cnt and A.TeamleaderId=B.EmployeeId)
Set @Cnt=@Cnt+1
End
--Select * from #TempBld Order by ParentTree
Returns