根据日期重叠对行进行分组

Group up rows based on date overlapping

在同一个id中,如果任何行的生效日期和结束日期重叠,那么我们需要将其分组在一个唯一的id中

下图中的日期组是所需的输出列

数据按 ID asc、EffectiveDate ASC、EndDate Desc 排序

CREATE TABLE #DataTable (id int , EffectiveDate datetime, Enddate Datetime  )


INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-01 00:00:00.000' AS DateTime), CAST(N'2017-01-11 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-02 00:00:00.000' AS DateTime), CAST(N'2017-01-05 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-03 00:00:00.000' AS DateTime), CAST(N'2017-01-12 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-06 00:00:00.000' AS DateTime), CAST(N'2017-01-09 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (1, CAST(N'2017-01-13 00:00:00.000' AS DateTime), CAST(N'2017-01-19 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (2, CAST(N'2017-02-01 00:00:00.000' AS DateTime), CAST(N'2017-02-11 00:00:00.000' AS DateTime))
GO
INSERT [dbo].#DataTable ([id], [EffectiveDate], [Enddate]) VALUES (2, CAST(N'2017-02-06 00:00:00.000' AS DateTime), CAST(N'2017-02-16 00:00:00.000' AS DateTime))
GO

这个怎么样(我还在测试中)

WITH Z AS 
  (SELECT * FROM (SELECT ID, [EffectiveDate], ENDDate
    , LAG(ID) OVER (PARTITION BY ID ORDER BY EffectiveDate, ENDDate Desc) AS ID_Prec
    , LAG(EffectiveDate) OVER (PARTITION BY ID ORDER BY EffectiveDate, ENDDate Desc) AS EffDate_Prec
    , LAG(ENDDate) OVER (PARTITION BY ID ORDER BY EffectiveDate, ENDDate Desc) AS EndDate_Prec
    , ROW_NUMBER() OVER (ORDER BY ID, EffectiveDate,ENDDate DESC) AS RN
    , 1 AS DATEGROUP
     FROM #DataTable ) C WHERE RN = 1     
    UNION ALL
     SELECT A.ID, A.EffectiveDate, A.Enddate
        , A.ID_Prec, A.EffDate_Prec
        , A.EndDate_Prec
        , A.RN
        , CASE WHEN  A.ID = A.ID_PREC AND (A.EffectiveDate <=A.EndDate_Prec /* OR A.EndDate>=A.EffDate_Prec*/) THEN Z.DATEGROUP 
          ELSE Z.DATEGROUP+1 END AS DATEGROUP
        FROM (SELECT A.ID, A.EffectiveDate, A.ENDDate
            , LAG(A.ID) OVER (PARTITION BY A.ID ORDER BY A.EffectiveDate, A.ENDDate Desc) AS ID_Prec
            , LAG(A.EffectiveDate) OVER (PARTITION BY A.ID ORDER BY A.EffectiveDate, A.ENDDate Desc) AS EffDate_Prec
            , LAG(A.ENDDate) OVER (PARTITION BY A.ID ORDER BY A.EffectiveDate, A.ENDDate Desc) AS EndDate_Prec
            , ROW_NUMBER() OVER (ORDER BY A.ID, A.EffectiveDate,A.ENDDate DESC) AS RN
            , 1 AS DATEGROUP
            FROM #DataTable A) A
     INNER JOIN Z ON A.RN -1= Z.RN
) 
SELECT ID, EffectiveDate, Enddate, DATEGROUP FROM Z

输出:

ID          EffectiveDate           Enddate                 DATEGROUP
----------- ----------------------- ----------------------- -----------
1           2017-01-01 00:00:00.000 2017-01-11 00:00:00.000 1
1           2017-01-02 00:00:00.000 2017-01-05 00:00:00.000 1
1           2017-01-03 00:00:00.000 2017-01-12 00:00:00.000 1
1           2017-01-06 00:00:00.000 2017-01-09 00:00:00.000 1
1           2017-01-13 00:00:00.000 2017-01-19 00:00:00.000 2
2           2017-02-01 00:00:00.000 2017-02-11 00:00:00.000 3
2           2017-02-06 00:00:00.000 2017-02-16 00:00:00.000 3

此答案采用的方法是尝试识别应增加 运行 DateGroup 计数器的记录。最终,我们将为这些记录分配 1 的值。有了这个任务,我们就可以简单地计算累加和来生成 DateGroup.

-- this CTE identifies all new ID records
WITH cte1 AS (
    SELECT t.ID, t.EffectiveDate, t.EndDate
    FROM
    (
        SELECT ID, EffectiveDate, EndDate,
               ROW_NUMBER() OVER (PARTITION BY ID ORDER BY EffectiveDate) rn
        FROM yourTable
    ) t
    WHERE t.rn = 1
),
-- this CTE identifies all records whose both effective and end dates
-- do not fall within the range of the start ID record
cte2 AS (
    SELECT t1.ID, t1.EffectiveDate, t1.EndDate
    FROM yourTable t1
    INNER JOIN cte1 t2
        ON t1.ID = t2.ID AND
           t1.EffectiveDate NOT BETWEEN t2.EffectiveDate AND t2.EndDate AND
           t1.EndDate NOT BETWEEN t2.EffectiveDate AND t2.EndDate
),
-- this CTE returns the original table with a new column, amount, which
-- contains a value of 1 should that record cause the DateGroup to be
-- incremented by 1
cte3 AS (
    SELECT t1.ID, t1.EffectiveDate, t1.EndDate,
           CASE WHEN t2.ID IS NOT NULL OR t3.ID IS NOT NULL THEN 1 ELSE 0 END AS amount
    FROM yourTable t1
    LEFT JOIN cte1 t2
        ON t1.ID = t2.ID AND
           t1.EffectiveDate = t2.EffectiveDate AND
           t1.EndDate = t2.EndDate
    LEFT JOIN cte2 t3
        ON t1.ID = t3.ID AND
           t1.EffectiveDate = t3.EffectiveDate AND
           t1.EndDate = t3.EndDate
)
-- finally, take a cumulative sum of the 'amount' column to generate the DateGroup
SELECT t1.ID,
       t1.EffectiveDate,
       t1.EndDate,
       SUM(t2.amount) AS DateGroup
FROM cte3 t1
INNER JOIN cte3 t2
    ON t1.ID >= t2.ID AND
       t1.EffectiveDate >= t2.EffectiveDate
GROUP BY t1.id, t1.EffectiveDate, t1.EndDate;

输出:

此处演示:

Rextester

使用的数据:

CREATE TABLE yourTable (ID int, EffectiveDate datetime, EndDate datetime);
INSERT INTO yourTable
VALUES
    (1, '2017-01-01 00:00:00.000', '2017-01-11 00:00:00.000'),
    (1, '2017-01-02 00:00:00.000', '2017-01-05 00:00:00.000'),
    (1, '2017-01-03 00:00:00.000', '2017-01-12 00:00:00.000'),
    (1, '2017-01-06 00:00:00.000', '2017-01-09 00:00:00.000'),
    (1, '2017-01-13 00:00:00.000', '2017-01-19 00:00:00.000'),
    (2, '2017-02-01 00:00:00.000', '2017-02-11 00:00:00.000'),
    (2, '2017-02-06 00:00:00.000', '2017-02-16 00:00:00.000');

试试看,希望对您有所帮助。不是最吸引人的代码,但它应该可以工作。等有时间再整理一下吧

;WITH cte_StepOne as
(
SELECT ROW_NUMBER() OVER (ORDER BY a.[id],
    a.[EffectiveDate],
    a.[Enddate]) AS SeqNo,
    a.[id],
    a.[EffectiveDate],
    a.[Enddate],
    b.[id] AS OverLapID,
    b.[EffectiveDate] AS [OverLapEffectiveDate],
    b.[Enddate] AS [OverLapEnddate]
FROM ##DataTable a
LEFT JOIN ##DataTable b
ON a.EffectiveDate BETWEEN b.EffectiveDate
        AND b.EndDate
    AND a.EffectiveDate <> b.EffectiveDate
    AND a.EndDate <> b.EndDate --and a.ID <> b.ID
)
,cte_StepTwo AS
(
SELECT SeqNo,
    id,
    EffectiveDate,
    Enddate,
    LEAD(OverLapEffectiveDate, 1) OVER (ORDER BY SeqNo) AS LeadValue,LAG(id, 1) OVER (ORDER BY SeqNo) AS LeadValueID,
    OverLapID,
    OverLapEffectiveDate,
    OverLapEnddate
FROM cte_StepOne
)
,cte_Result AS
(
SELECT id,
    EffectiveDate,
    Enddate,
    CASE 
        WHEN LeadValue = EffectiveDate AND OverLapEffectiveDate IS NULL THEN ID
        WHEN OverLapID IS NULL THEN LeadValueID + 1
        ELSE OverLapID
    END AS OverLapID,
    CASE 
        WHEN LeadValue = EffectiveDate AND OverLapEffectiveDate IS NULL THEN EffectiveDate
        ELSE OverLapEffectiveDate
    END AS OverLapEffectiveDate,
    CASE 
        WHEN LeadValue = EffectiveDate AND OverLapEffectiveDate IS NULL THEN Enddate
        ELSE OverLapEnddate
    END AS OverLapEnddate
FROM cte_StepTwo
)
SELECT DISTINCT id,
    EffectiveDate,
    Enddate,
    DENSE_RANK() OVER (ORDER BY ID,OverLapID) AS DateGroup
FROM cte_Result
ORDER BY id,EffectiveDate

结果:

这个呢?比其他解决方案更简单:

WITH 
CTE_GetFirstRecordForEachId AS
(
    SELECT 
        id,
        EffectiveDate,
        Enddate,
        rn = ROW_NUMBER() OVER (PARTITION BY id ORDER BY EffectiveDate, EndDate)
    FROM 
        #DataTable
),

CTE_GetOutOfDateRange AS
(
SELECT 
    a.*,
    OutOfDateRange = 
        CASE WHEN (b.EffectiveDate>=a.EffectiveDate AND b.EffectiveDate<=b.Enddate) OR (b.Enddate>=a.EffectiveDate AND b.Enddate<=b.Enddate) 
            THEN 0
            ELSE 1
        END
FROM 
    #DataTable a
INNER JOIN
    CTE_GetFirstRecordForEachId b ON a.id = b.id AND b.rn=1
)

SELECT
    id,
    Effectivedate,
    Enddate,
    DateGroup = DENSE_RANK() OVER (ORDER BY id, OutOfDateRange)
FROM
    CTE_GetOutOfDateRange
ORDER BY
    id, Effectivedate, Enddate

输出:

    id          Effectivedate           Enddate                 DateGroup
----------- ----------------------- ----------------------- --------------------
1           2017-01-01 00:00:00.000 2017-01-11 00:00:00.000 1
1           2017-01-02 00:00:00.000 2017-01-05 00:00:00.000 1
1           2017-01-03 00:00:00.000 2017-01-12 00:00:00.000 1
1           2017-01-06 00:00:00.000 2017-01-09 00:00:00.000 1
1           2017-01-13 00:00:00.000 2017-01-19 00:00:00.000 2
2           2017-02-01 00:00:00.000 2017-02-11 00:00:00.000 3
2           2017-02-06 00:00:00.000 2017-02-16 00:00:00.000 3

这可能对您有所帮助。我在这里发布了最短和最简单的 tsql 版本...

WITH CTE AS (
SELECT *,
       ISNULL(LAG(EffectiveDate) OVER (PARTITION BY id ORDER BY id,EffectiveDate,Enddate),EffectiveDate) AS PreviousEffDate,
       ISNULL(LAG(Enddate) OVER (PARTITION BY id ORDER BY id,EffectiveDate,Enddate),Enddate) AS PreviousEndDate 
FROM #DataTable)

SELECT id,
       EffectiveDate,
       Enddate, 
       DENSE_RANK() OVER (ORDER BY id,CASE 
                                      WHEN EffectiveDate BETWEEN PreviousEffDate AND PreviousEndDate OR
                                           Enddate BETWEEN PreviousEffDate AND PreviousEndDate OR
                                           PreviousEffDate BETWEEN EffectiveDate AND Enddate OR
                                           PreviousEndDate BETWEEN EffectiveDate AND Enddate
                                       THEN 0
                                       ELSE 1
                                       END) AS DateGroup
FROM CTE

结果:

我猜你在样本日期中遗漏了一些测试场景。

;with CTE as
(
select *,ROW_NUMBER()over(order by id, effectivedate)rn  
from #DataTable
)
,CTE1 AS
(
select *, 1  New_ID 

from cte
where rn=1

union ALL

select c.id,c.effectivedate,c.enddate,c.rn

,case when c.effectivedate between c1.effectivedate
 and c1.enddate 
and c.id=c1.id  then c1.New_ID 
else c1.New_ID+1
END

  from cte c
inner join cte1 c1
on c.rn=c1.rn+1
and  c.rn>1 and c.rn<=7
)

select * from cte1  

drop table #DataTable

从另一个论坛得到这个;根据我的要求进行了更改。看起来简单有效。

WITH C1 AS (
SELECT *,
   CASE WHEN EffectiveDate <= MAX(ISnull(EndDate,'9999-12-31 00:00:00.000')) OVER(partition by id ORDER BY EffectiveDate  ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) THEN 0 ELSE 1 END AS isstart
FROM #DataTable 
) 
SELECT ID,EffectiveDate,EndDate,
   SUM(isstart) OVER(ORDER BY ID ROWS UNBOUNDED PRECEDING) AS DG 
FROM C1