SQL 服务器 CTE 查找从一个 ID 到另一个 ID 的路径
SQL Server CTE to find path from one ID to another ID
我在 SQL Server 2008 R2 数据库中有一个 table,它定义了不同状态之间的转换。
示例相关 table 列:
TransitionID | SourceStateID | DestinationStateID | WorkflowID
--------------------------------------------------------------
1 | 17 | 18 | 3
2 | 17 | 21 | 3
3 | 17 | 24 | 3
4 | 17 | 172 | 3
5 | 18 | 17 | 3
6 | 18 | 24 | 3
7 | 18 | 31 | 3
8 | 21 | 17 | 3
9 | 21 | 20 | 3
10 | 21 | 141 | 3
11 | 21 | 184 | 3
...等等
我的目标是能够定义 starting StateID、ending StateID 和 WorkflowID,并希望找到一个合乎逻辑的该工作流中从起始 StateID 到结束 StateID 的路径。
例如对于上面的 table,如果我提供的起始 StateID 为 17,结束 StateID 为 184,WorkflowID 为 3,我可以得到 17>21>184(或更多)的 'path'理想情况下,TransitionID 2 > TransitionID 11)
好的:
- 定义的开始和结束 StateID 将总是在定义的 WorkflowID
中有一个可能路径
坏处:
几乎每个 source/destination StateID 都有循环引用(即可能存在从 SourceStateID 1 到 DestinationStateID 2 的转换,也可能从 SourceStateID 2 到 DestinationStateID 1
几乎任何定义的起始 StateID 和结束 StateID 肯定有多个可能的路径
我意识到这是我追求的 CTE 的某种方式,但不可否认,我发现 CTE 通常很难掌握,而且当循环引用是一个有保证的问题时更是如此。
完美的解决方案是选择从起始 StateID 到结束 StateID 的最短路径,但老实说,如果我能得到可靠地工作的东西,可以给我 any 两个州之间的有效路径我会很高兴。
有哪位 SQL 专家能给我指出一些方向吗?老实说,我什至不知道从哪里开始防止循环问题,比如沿着 17>18>17>18>17>18...
令人作呕的更新
我想出了这个查询,它在情感层面上伤害了我(在这个 post: 的帮助下),但似乎有效。
DECLARE @sourceStatusId INT = 17
DECLARE @destinationStatusId INT = 24
DECLARE @workflowId INT = 3
DECLARE @sourceStatusIdString NVARCHAR(MAX) = CAST(@sourceStatusId AS NVARCHAR(MAX))
DECLARE @destinationStatusIdString NVARCHAR(MAX) = CAST(@destinationStatusId AS NVARCHAR(MAX))
DECLARE @workflowIdString NVARCHAR(MAX) = CAST(@workflowId AS NVARCHAR(MAX))
;WITH CTE ([Source], [Destination], [Sentinel]) AS
(
SELECT
CAST(t.[Source] AS NVARCHAR(MAX)) AS [Source],
CAST(t.[Destination] AS NVARCHAR(MAX)) AS [Destination],
[Sentinel] = CAST([Source] AS NVARCHAR(MAX))
FROM
Transitions t
WHERE
CAST([Source] AS NVARCHAR(MAX)) = @sourceStatusIdString AND
CAST([WorkflowID] AS NVARCHAR(MAX)) = @workflowIdString
UNION ALL
SELECT
CAST(CTE.[Destination] AS NVARCHAR(MAX)),
CAST(t.[Destination] AS NVARCHAR(MAX)),
CAST([Sentinel] AS NVARCHAR(MAX)) + CAST('|' AS NVARCHAR(MAX)) + CAST(CTE.[Destination] AS NVARCHAR(MAX))
FROM
CTE
JOIN Transitions t
ON CAST(t.[Source] AS NVARCHAR(MAX)) = CAST(CTE.[Destination] AS NVARCHAR(MAX))
WHERE
CHARINDEX(CTE.[Destination], Sentinel)=0
)
SELECT TOP 1
[Sentinel]
FROM
CTE
WHERE
LEFT([Sentinel], LEN(@sourceStatusIdString)) = @sourceStatusIdString AND
RIGHT([Sentinel], LEN(@destinationStatusIdString)) = @destinationStatusIdString
ORDER BY
LEN([Sentinel])
WITH q (id, src, dst, sid, cnt, path) AS
(
SELECT transitionId, sourceStateId, destinationStateId, sourceStateId, 1,
CAST(transitionId AS VARCHAR(MAX)) path
FROM mytable
WHERE sourceStateId = 18
UNION ALL
SELECT m.transitionId, m.sourceStateId, m.destinationStateId,
CASE WHEN sid < sourceStateId THEN sid ELSE sourceStateId END, cnt + 1,
path + ', ' + CAST(transitionId AS VARCHAR(MAX))
FROM q
JOIN mytable m
ON m.sourceStateId = q.dst
AND m.sourceStateId <> q.sid
)
SELECT TOP 1
*
FROM q
WHERE dst = 184
ORDER BY
cnt DESC
类似于 Quassnoi 的回答,但防止在第一个元素之后开始的循环引用:
DECLARE @src int = 18, @dst int = 184;
WITH cte AS
(Select TransitionId, SourceStateId, DestinationStateID, SourceStateID as StartingState, 0 as Depth
, cast(TransitionId as varchar(max)) + ',' as IDPath
, cast(SourceStateID as varchar(max)) + ',' as States
FROM Transitions
WHERE SourceStateId = @src
UNION ALL
Select t.TransitionId, t.SourceStateId, t.DestinationStateID, StartingState, cte.Depth + 1
, cte.IDPath + cast(t.TransitionId as varchar(max)) + ','
, cte.States + cast(t.SourceStateID as varchar(max)) + ','
FROM cte JOIN Transitions t on cte.DestinationStateID = t.SourceStateId
and CHARINDEX(',' + cast(t.SourceStateID as varchar(max)) + ',', States) = 0 --prevent loop starting after first element
and t.DestinationStateID <> StartingState --prevent loop starting with first element
where cte.Depth < 10 -- prevent going too deep
)
select TransitionId, SourceStateId, DestinationStateID, Depth, left(IDPath, len(IDPath) - 1) IDPath, left(States, len(States) - 1) States
from cte
where DestinationStateID = @dst
order by Depth
这是我的看法...
基于 Quassnoi 的构建,但比 Kateract 更短 ;-)
http://www.sqlfiddle.com/#!6/322d3/4
WITH data AS (
SELECT TransitionId, SourceStateId, DestinationStateId,
'|' + CAST(transitionId AS VARCHAR(MAX)) as Path
FROM States
WHERE sourceStateId = 17
AND WorkflowID = 3
UNION ALL
SELECT m.TransitionId, m.sourceStateId, m.DestinationStateId,
Path + '|' + CAST(m.TransitionId AS VARCHAR(MAX))
FROM data d
JOIN States m ON
m.sourceStateId = d.DestinationStateId
AND CHARINDEX( '|' + convert(varchar(10),m.TransitionID) + '|', path) = 0
AND WorkflowID = 3
)
SELECT TOP 1 *
FROM data
WHERE DestinationStateId = 184
ORDER BY LEN(Path)
-- If you want the longest path... ORDER BY LEN(Path) DESC
我在 SQL Server 2008 R2 数据库中有一个 table,它定义了不同状态之间的转换。
示例相关 table 列:
TransitionID | SourceStateID | DestinationStateID | WorkflowID
--------------------------------------------------------------
1 | 17 | 18 | 3
2 | 17 | 21 | 3
3 | 17 | 24 | 3
4 | 17 | 172 | 3
5 | 18 | 17 | 3
6 | 18 | 24 | 3
7 | 18 | 31 | 3
8 | 21 | 17 | 3
9 | 21 | 20 | 3
10 | 21 | 141 | 3
11 | 21 | 184 | 3
...等等
我的目标是能够定义 starting StateID、ending StateID 和 WorkflowID,并希望找到一个合乎逻辑的该工作流中从起始 StateID 到结束 StateID 的路径。
例如对于上面的 table,如果我提供的起始 StateID 为 17,结束 StateID 为 184,WorkflowID 为 3,我可以得到 17>21>184(或更多)的 'path'理想情况下,TransitionID 2 > TransitionID 11)
好的:
- 定义的开始和结束 StateID 将总是在定义的 WorkflowID 中有一个可能路径
坏处:
几乎每个 source/destination StateID 都有循环引用(即可能存在从 SourceStateID 1 到 DestinationStateID 2 的转换,也可能从 SourceStateID 2 到 DestinationStateID 1
几乎任何定义的起始 StateID 和结束 StateID 肯定有多个可能的路径
我意识到这是我追求的 CTE 的某种方式,但不可否认,我发现 CTE 通常很难掌握,而且当循环引用是一个有保证的问题时更是如此。
完美的解决方案是选择从起始 StateID 到结束 StateID 的最短路径,但老实说,如果我能得到可靠地工作的东西,可以给我 any 两个州之间的有效路径我会很高兴。
有哪位 SQL 专家能给我指出一些方向吗?老实说,我什至不知道从哪里开始防止循环问题,比如沿着 17>18>17>18>17>18...
令人作呕的更新 我想出了这个查询,它在情感层面上伤害了我(在这个 post: 的帮助下),但似乎有效。
DECLARE @sourceStatusId INT = 17
DECLARE @destinationStatusId INT = 24
DECLARE @workflowId INT = 3
DECLARE @sourceStatusIdString NVARCHAR(MAX) = CAST(@sourceStatusId AS NVARCHAR(MAX))
DECLARE @destinationStatusIdString NVARCHAR(MAX) = CAST(@destinationStatusId AS NVARCHAR(MAX))
DECLARE @workflowIdString NVARCHAR(MAX) = CAST(@workflowId AS NVARCHAR(MAX))
;WITH CTE ([Source], [Destination], [Sentinel]) AS
(
SELECT
CAST(t.[Source] AS NVARCHAR(MAX)) AS [Source],
CAST(t.[Destination] AS NVARCHAR(MAX)) AS [Destination],
[Sentinel] = CAST([Source] AS NVARCHAR(MAX))
FROM
Transitions t
WHERE
CAST([Source] AS NVARCHAR(MAX)) = @sourceStatusIdString AND
CAST([WorkflowID] AS NVARCHAR(MAX)) = @workflowIdString
UNION ALL
SELECT
CAST(CTE.[Destination] AS NVARCHAR(MAX)),
CAST(t.[Destination] AS NVARCHAR(MAX)),
CAST([Sentinel] AS NVARCHAR(MAX)) + CAST('|' AS NVARCHAR(MAX)) + CAST(CTE.[Destination] AS NVARCHAR(MAX))
FROM
CTE
JOIN Transitions t
ON CAST(t.[Source] AS NVARCHAR(MAX)) = CAST(CTE.[Destination] AS NVARCHAR(MAX))
WHERE
CHARINDEX(CTE.[Destination], Sentinel)=0
)
SELECT TOP 1
[Sentinel]
FROM
CTE
WHERE
LEFT([Sentinel], LEN(@sourceStatusIdString)) = @sourceStatusIdString AND
RIGHT([Sentinel], LEN(@destinationStatusIdString)) = @destinationStatusIdString
ORDER BY
LEN([Sentinel])
WITH q (id, src, dst, sid, cnt, path) AS
(
SELECT transitionId, sourceStateId, destinationStateId, sourceStateId, 1,
CAST(transitionId AS VARCHAR(MAX)) path
FROM mytable
WHERE sourceStateId = 18
UNION ALL
SELECT m.transitionId, m.sourceStateId, m.destinationStateId,
CASE WHEN sid < sourceStateId THEN sid ELSE sourceStateId END, cnt + 1,
path + ', ' + CAST(transitionId AS VARCHAR(MAX))
FROM q
JOIN mytable m
ON m.sourceStateId = q.dst
AND m.sourceStateId <> q.sid
)
SELECT TOP 1
*
FROM q
WHERE dst = 184
ORDER BY
cnt DESC
类似于 Quassnoi 的回答,但防止在第一个元素之后开始的循环引用:
DECLARE @src int = 18, @dst int = 184;
WITH cte AS
(Select TransitionId, SourceStateId, DestinationStateID, SourceStateID as StartingState, 0 as Depth
, cast(TransitionId as varchar(max)) + ',' as IDPath
, cast(SourceStateID as varchar(max)) + ',' as States
FROM Transitions
WHERE SourceStateId = @src
UNION ALL
Select t.TransitionId, t.SourceStateId, t.DestinationStateID, StartingState, cte.Depth + 1
, cte.IDPath + cast(t.TransitionId as varchar(max)) + ','
, cte.States + cast(t.SourceStateID as varchar(max)) + ','
FROM cte JOIN Transitions t on cte.DestinationStateID = t.SourceStateId
and CHARINDEX(',' + cast(t.SourceStateID as varchar(max)) + ',', States) = 0 --prevent loop starting after first element
and t.DestinationStateID <> StartingState --prevent loop starting with first element
where cte.Depth < 10 -- prevent going too deep
)
select TransitionId, SourceStateId, DestinationStateID, Depth, left(IDPath, len(IDPath) - 1) IDPath, left(States, len(States) - 1) States
from cte
where DestinationStateID = @dst
order by Depth
这是我的看法... 基于 Quassnoi 的构建,但比 Kateract 更短 ;-)
http://www.sqlfiddle.com/#!6/322d3/4
WITH data AS (
SELECT TransitionId, SourceStateId, DestinationStateId,
'|' + CAST(transitionId AS VARCHAR(MAX)) as Path
FROM States
WHERE sourceStateId = 17
AND WorkflowID = 3
UNION ALL
SELECT m.TransitionId, m.sourceStateId, m.DestinationStateId,
Path + '|' + CAST(m.TransitionId AS VARCHAR(MAX))
FROM data d
JOIN States m ON
m.sourceStateId = d.DestinationStateId
AND CHARINDEX( '|' + convert(varchar(10),m.TransitionID) + '|', path) = 0
AND WorkflowID = 3
)
SELECT TOP 1 *
FROM data
WHERE DestinationStateId = 184
ORDER BY LEN(Path)
-- If you want the longest path... ORDER BY LEN(Path) DESC