SQL 服务器为相关记录创建分组
SQL Server Create Grouping For Related Records
我 运行 遇到一个有趣的场景,试图将任意 FamilyId
分配给彼此相关的字段。
这是我们目前正在使用的结构:
DataId OriginalDataId
3 1
4 1
5 1
6 1
3 2
4 2
5 2
6 2
7 10
8 10
9 10
11 15
我们试图做的是向所有彼此之间有关系的 DataId
添加一个 FamilyId
列。
在这种情况下,Id的3
、4
、5
和6
与1
有关系。但是3
、4
、5
、6
也和2
有关系。所以1
、2
、3
、4
、5
、6
都应该被认为是在同一个FamilyId
.
7
、8
、9
只和10
有关系,这就把它放到一个单独的FamilyId
里了。 11
和 15
.
相同
我期望的结果如下:
DataId FamilyId
1 1
2 1
3 1
4 1
5 1
6 1
7 2
8 2
9 2
10 2
11 3
15 3
示例数据、结构和查询:
Declare @Results_Stage Table
(
DataId BigInt Not Null,
OriginalDataId BigInt Null
)
Insert @Results_Stage
Values (3,1), (4,1), (5,1), (6,1), (3,2), (4,2), (5,2), (6,2), (7,10), (8, 10), (9, 10), (11, 15)
Select DataId, Row_Number() Over(Partition By DataId Order By OriginalDataId Asc) FamilyId
From @Results_Stage R
Union
Select OriginalDataId, Row_Number() Over(Partition By DataId Order By OriginalDataId Asc) FamilyId
From @Results_Stage
我很肯定我的尝试远非正确,但老实说我不确定从哪里开始 - 或者在 SQL 服务器中是否有可能。
有没有人知道如何解决这个问题,或者至少,有什么能为我指明正确的方向?
编辑 下面是我到目前为止提出的一个查询,用于识别应该属于相同 FamilyId
的其他 DataId
记录=44=]
Declare @DataId BigInt = 1
;With Children As
(
Select Distinct X.DataId
From @Results_Stage S
Outer Apply
(
Select Distinct DataId
From @Results_Stage R
Where R.OriginalDataId = S.DataId
Or R.OriginalDataId = S.OriginalDataId
) X
Where S.DataId = @DataId
Or S.OriginalDataId = @DataId
)
Select Distinct O.OriginalDataId
From Children C
Outer Apply
(
Select S.OriginalDataId
From @Results_Stage S
Where S.DataId = C.DataId
) O
Union
Select DataId
From Children
以下查询,它使用了 FOR XML PATH
:
SELECT R.OriginalDataId,
STUFF((
SELECT ', ' + + CAST([DataId] AS VARCHAR(MAX))
FROM #Results_Stage
WHERE (OriginalDataId = R.OriginalDataId)
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS GroupValues
FROM #Results_Stage R
GROUP BY R.OriginalDataId
可用于生成此输出:
OriginalDataId GroupValues
===========================
1 3, 4, 5, 6
2 3, 4, 5, 6
10 7, 8, 9
15 11
使用上面的结果集,我们可以很容易地识别每个组,从而得到可以应用 DENSE_RANK()
的东西:
;WITH GroupedData AS (
SELECT R.OriginalDataId,
STUFF((
SELECT ', ' + + CAST([DataId] AS VARCHAR(MAX))
FROM #Results_Stage
WHERE (OriginalDataId = R.OriginalDataId)
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS GroupValues
FROM #Results_Stage R
GROUP BY R.OriginalDataId
), Families AS (
SELECT OriginalDataId, DENSE_RANK() OVER (ORDER BY GroupValues) AS FamilyId
FROM GroupedData
)
SELECT OriginalDataId AS DataId, FamilyId
FROM Families
UNION
SELECT DataId, F.FamilyId
FROM #Results_Stage R
INNER JOIN Families F ON R.OriginalDataId = F.OriginalDataId
ORDER BY FamilyId
上面的输出是:
DataId FamilyId
===================
11 1
15 1
1 2
2 2
3 2
4 2
5 2
6 2
7 3
8 3
9 3
10 3
检查这个...它看起来不太好,但正在做这项工作:)
DECLARE @T TABLE (DataId INT, OriginalDataId INT)
INSERT INTO @T(DataId , OriginalDataId)
select 3,1
union all select 4,1
union all select 5,1
union all select 6,1
union all select 3,2
union all select 4,2
union all select 5,2
union all select 6,2
union all select 7,10
union all select 8,10
union all select 9,10
union all select 11,15
SELECT * FROM @T
;WITH f AS (
SELECT DISTINCT OriginalDataId FROM @T
)
, m AS (
SELECT DISTINCT
DataId , OriginalDataId = MIN(OriginalDataId)
FROM @T
GROUP BY DataId
)
, m2 AS (
SELECT DISTINCT
x.DataId , x.OriginalDataId
FROM @T AS x
LEFT OUTER JOIN m ON x.DataId = m.DataId AND x.OriginalDataId = m.OriginalDataId
WHERE m.DataId IS NULL
)
, m3 AS (
SELECT DISTINCT DataId = x.OriginalDataId , m.OriginalDataId
FROM m2 AS x
INNER JOIN m ON x.DataId = m.DataId
)
, m4 AS (
SELECT DISTINCT
DataId = OriginalDataId , OriginalDataId
FROM @T
WHERE OriginalDataId NOT IN(SELECT DataId FROM m3)
UNION
SELECT DISTINCT
x.DataId , f.OriginalDataId
FROM f
INNER JOIN m AS x on x.OriginalDataId = f.OriginalDataId
WHERE x.DataId NOT IN(SELECT DataId FROM m3)
UNION
SELECT DataId , OriginalDataId FROM m3
)
, list AS (
SELECT
x.DataId, FamilyId = DENSE_RANK() OVER(ORDER BY x.OriginalDataId )
FROM m4 AS x
)
SELECT * FROM list
-- OUTPUT
DataId FamilyId
1 1
2 1
3 1
4 1
5 1
6 1
7 2
8 2
9 2
10 2
11 3
15 3
我 运行 遇到一个有趣的场景,试图将任意 FamilyId
分配给彼此相关的字段。
这是我们目前正在使用的结构:
DataId OriginalDataId
3 1
4 1
5 1
6 1
3 2
4 2
5 2
6 2
7 10
8 10
9 10
11 15
我们试图做的是向所有彼此之间有关系的 DataId
添加一个 FamilyId
列。
在这种情况下,Id的3
、4
、5
和6
与1
有关系。但是3
、4
、5
、6
也和2
有关系。所以1
、2
、3
、4
、5
、6
都应该被认为是在同一个FamilyId
.
7
、8
、9
只和10
有关系,这就把它放到一个单独的FamilyId
里了。 11
和 15
.
我期望的结果如下:
DataId FamilyId
1 1
2 1
3 1
4 1
5 1
6 1
7 2
8 2
9 2
10 2
11 3
15 3
示例数据、结构和查询:
Declare @Results_Stage Table
(
DataId BigInt Not Null,
OriginalDataId BigInt Null
)
Insert @Results_Stage
Values (3,1), (4,1), (5,1), (6,1), (3,2), (4,2), (5,2), (6,2), (7,10), (8, 10), (9, 10), (11, 15)
Select DataId, Row_Number() Over(Partition By DataId Order By OriginalDataId Asc) FamilyId
From @Results_Stage R
Union
Select OriginalDataId, Row_Number() Over(Partition By DataId Order By OriginalDataId Asc) FamilyId
From @Results_Stage
我很肯定我的尝试远非正确,但老实说我不确定从哪里开始 - 或者在 SQL 服务器中是否有可能。
有没有人知道如何解决这个问题,或者至少,有什么能为我指明正确的方向?
编辑 下面是我到目前为止提出的一个查询,用于识别应该属于相同 FamilyId
的其他 DataId
记录=44=]
Declare @DataId BigInt = 1
;With Children As
(
Select Distinct X.DataId
From @Results_Stage S
Outer Apply
(
Select Distinct DataId
From @Results_Stage R
Where R.OriginalDataId = S.DataId
Or R.OriginalDataId = S.OriginalDataId
) X
Where S.DataId = @DataId
Or S.OriginalDataId = @DataId
)
Select Distinct O.OriginalDataId
From Children C
Outer Apply
(
Select S.OriginalDataId
From @Results_Stage S
Where S.DataId = C.DataId
) O
Union
Select DataId
From Children
以下查询,它使用了 FOR XML PATH
:
SELECT R.OriginalDataId,
STUFF((
SELECT ', ' + + CAST([DataId] AS VARCHAR(MAX))
FROM #Results_Stage
WHERE (OriginalDataId = R.OriginalDataId)
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS GroupValues
FROM #Results_Stage R
GROUP BY R.OriginalDataId
可用于生成此输出:
OriginalDataId GroupValues
===========================
1 3, 4, 5, 6
2 3, 4, 5, 6
10 7, 8, 9
15 11
使用上面的结果集,我们可以很容易地识别每个组,从而得到可以应用 DENSE_RANK()
的东西:
;WITH GroupedData AS (
SELECT R.OriginalDataId,
STUFF((
SELECT ', ' + + CAST([DataId] AS VARCHAR(MAX))
FROM #Results_Stage
WHERE (OriginalDataId = R.OriginalDataId)
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS GroupValues
FROM #Results_Stage R
GROUP BY R.OriginalDataId
), Families AS (
SELECT OriginalDataId, DENSE_RANK() OVER (ORDER BY GroupValues) AS FamilyId
FROM GroupedData
)
SELECT OriginalDataId AS DataId, FamilyId
FROM Families
UNION
SELECT DataId, F.FamilyId
FROM #Results_Stage R
INNER JOIN Families F ON R.OriginalDataId = F.OriginalDataId
ORDER BY FamilyId
上面的输出是:
DataId FamilyId
===================
11 1
15 1
1 2
2 2
3 2
4 2
5 2
6 2
7 3
8 3
9 3
10 3
检查这个...它看起来不太好,但正在做这项工作:)
DECLARE @T TABLE (DataId INT, OriginalDataId INT)
INSERT INTO @T(DataId , OriginalDataId)
select 3,1
union all select 4,1
union all select 5,1
union all select 6,1
union all select 3,2
union all select 4,2
union all select 5,2
union all select 6,2
union all select 7,10
union all select 8,10
union all select 9,10
union all select 11,15
SELECT * FROM @T
;WITH f AS (
SELECT DISTINCT OriginalDataId FROM @T
)
, m AS (
SELECT DISTINCT
DataId , OriginalDataId = MIN(OriginalDataId)
FROM @T
GROUP BY DataId
)
, m2 AS (
SELECT DISTINCT
x.DataId , x.OriginalDataId
FROM @T AS x
LEFT OUTER JOIN m ON x.DataId = m.DataId AND x.OriginalDataId = m.OriginalDataId
WHERE m.DataId IS NULL
)
, m3 AS (
SELECT DISTINCT DataId = x.OriginalDataId , m.OriginalDataId
FROM m2 AS x
INNER JOIN m ON x.DataId = m.DataId
)
, m4 AS (
SELECT DISTINCT
DataId = OriginalDataId , OriginalDataId
FROM @T
WHERE OriginalDataId NOT IN(SELECT DataId FROM m3)
UNION
SELECT DISTINCT
x.DataId , f.OriginalDataId
FROM f
INNER JOIN m AS x on x.OriginalDataId = f.OriginalDataId
WHERE x.DataId NOT IN(SELECT DataId FROM m3)
UNION
SELECT DataId , OriginalDataId FROM m3
)
, list AS (
SELECT
x.DataId, FamilyId = DENSE_RANK() OVER(ORDER BY x.OriginalDataId )
FROM m4 AS x
)
SELECT * FROM list
-- OUTPUT
DataId FamilyId
1 1
2 1
3 1
4 1
5 1
6 1
7 2
8 2
9 2
10 2
11 3
15 3