按特殊条件过滤日期范围

Filtering date ranges by special condition

我有一个 table 包含以下数据(按开始日期排序):

    | Id | StartDate           | EndDate             |
    |----|---------------------|---------------------|
    | 1  | 2017-03-18 00:00:00 | 2017-03-18 02:00:00 |
    | 2  | 2017-03-18 01:30:00 | 2017-03-18 04:00:00 |
    | 3  | 2017-03-18 01:45:00 | 2017-03-18 03:00:00 |
    | 4  | 2017-03-18 02:10:00 | 2017-03-18 02:30:00 |
    | 5  | 2017-03-18 02:35:00 | 2017-03-18 02:50:00 |
    | 6  | 2017-03-18 03:20:00 | 2017-03-18 03:50:00 |
    | 7  | 2017-03-18 05:00:00 | 2017-03-18 05:30:00 |
    | 8  | 2017-03-18 05:10:00 | 2017-03-18 07:00:00 |
    | 9  | 2017-03-18 05:50:00 | 2017-03-18 08:00:00 |

过滤逻辑: 在第一段日期之后,我们找到另一个与它不相交的日期。 然后针对找到的周期重复逻辑。


过滤后应该保持:

    | Id | StartDate           | EndDate             |
    |----|---------------------|---------------------|
    | 1  | 2017-03-18 00:00:00 | 2017-03-18 02:00:00 |
    | 4  | 2017-03-18 02:10:00 | 2017-03-18 02:30:00 |
    | 5  | 2017-03-18 02:35:00 | 2017-03-18 02:50:00 |
    | 6  | 2017-03-18 03:20:00 | 2017-03-18 03:50:00 |
    | 7  | 2017-03-18 05:00:00 | 2017-03-18 05:30:00 |
    | 9  | 2017-03-18 05:50:00 | 2017-03-18 08:00:00 |


是否可以在不使用 CURSOR 的情况下实现它?

方便使用记录:

CREATE TABLE #Dates (Id INT, StartDate DATETIME, EndDate DATETIME);
INSERT INTO #Dates SELECT 1, '2017-03-18 00:00:00', '2017-03-18 02:00:00';
INSERT INTO #Dates SELECT 2, '2017-03-18 01:30:00', '2017-03-18 04:00:00';
INSERT INTO #Dates SELECT 3, '2017-03-18 01:45:00', '2017-03-18 03:00:00';
INSERT INTO #Dates SELECT 4, '2017-03-18 02:10:00', '2017-03-18 02:30:00';
INSERT INTO #Dates SELECT 5, '2017-03-18 02:35:00', '2017-03-18 02:50:00';
INSERT INTO #Dates SELECT 6, '2017-03-18 03:20:00', '2017-03-18 03:50:00';
INSERT INTO #Dates SELECT 7, '2017-03-18 05:00:00', '2017-03-18 05:30:00';
INSERT INTO #Dates SELECT 8, '2017-03-18 05:10:00', '2017-03-18 07:00:00';
INSERT INTO #Dates SELECT 9, '2017-03-18 05:50:00', '2017-03-18 08:00:00';
DROP TABLE #Dates;

是的,您可以在没有光标的情况下执行此操作。这是一个使用 exists 的版本,假设在开始日期没有完全重复:

select min(id), min(startdate), max(startdate)
from (select t.*, sum(flag) over (order by startdate) as grp
      from (select t.*,
                   (case when not exists (select 1
                                          from t t2
                                          where t2.startdate < t.startdate and
                                                t2.enddate >= t.startdate
                                         )
                         then 1 else 0
                    end) as flag
            from t
           ) t
      ) t
group by grp;

最里面的子查询计算一个标志以确定行是否重叠。中层累加这个flag来标识每一组,外层做聚合。

从SQL Server 2012开始,可以使用LEAD And LAG函数来实现:

CREATE TABLE #Dates (Id INT, StartDate DATETIME, EndDate DATETIME);
INSERT INTO #Dates SELECT 1, '2017-03-18 00:00:00', '2017-03-18 02:00:00';
INSERT INTO #Dates SELECT 2, '2017-03-18 01:30:00', '2017-03-18 04:00:00';
INSERT INTO #Dates SELECT 3, '2017-03-18 01:45:00', '2017-03-18 03:00:00';
INSERT INTO #Dates SELECT 4, '2017-03-18 02:10:00', '2017-03-18 02:30:00';
INSERT INTO #Dates SELECT 5, '2017-03-18 02:35:00', '2017-03-18 02:50:00';
INSERT INTO #Dates SELECT 6, '2017-03-18 03:20:00', '2017-03-18 03:50:00';
INSERT INTO #Dates SELECT 7, '2017-03-18 05:00:00', '2017-03-18 05:30:00';
INSERT INTO #Dates SELECT 8, '2017-03-18 05:10:00', '2017-03-18 07:00:00';
INSERT INTO #Dates SELECT 9, '2017-03-18 05:50:00', '2017-03-18 08:00:00';


SELECT T1.Id, T1.StartDate, T1.EndDate FROM (
    SELECT *,
        LAG(EndDate) OVER(ORDER BY ID) as lagdate  ,
        LEAD(StartDate) OVER(ORDER BY ID) as leaddate  
    FROM #Dates) AS T1
WHERE T1.StartDate > T1.lagdate OR T1.lagdate IS NULL 
   OR T1.EndDate < T1.leaddate  OR T1.leaddate IS NULL

这是结果:

注意:T1.lagdate IS NULL用于获取第一行,T1.leaddate IS NULL用于获取最后一行

CREATE TABLE #Dates (Id INT, StartDate DATETIME, EndDate DATETIME);
INSERT INTO #Dates SELECT 1, '2017-03-18 00:00:00', '2017-03-18 02:00:00';
INSERT INTO #Dates SELECT 2, '2017-03-18 01:30:00', '2017-03-18 04:00:00';
INSERT INTO #Dates SELECT 3, '2017-03-18 01:45:00', '2017-03-18 03:00:00';
INSERT INTO #Dates SELECT 4, '2017-03-18 02:10:00', '2017-03-18 02:30:00';
INSERT INTO #Dates SELECT 5, '2017-03-18 02:35:00', '2017-03-18 02:50:00';
INSERT INTO #Dates SELECT 6, '2017-03-18 03:20:00', '2017-03-18 03:50:00';
INSERT INTO #Dates SELECT 7, '2017-03-18 05:00:00', '2017-03-18 05:30:00';
INSERT INTO #Dates SELECT 8, '2017-03-18 05:10:00', '2017-03-18 07:00:00';
INSERT INTO #Dates SELECT 9, '2017-03-18 05:50:00', '2017-03-18 08:00:00';

DECLARE  @Results TABLE (Id INT, StartDate DATETIME, EndDate DATETIME);

INSERT INTO @Results
SELECT TOP 1 * 
FROM #Dates
ORDER BY StartDate

/* 这是您的过滤逻辑的代码:在第一段日期之后,我们找到另一个与它不相交的日期。然后针对找到的周期重复该逻辑。 */

WHILE(@@ROWCOUNT > 0)   -- we did something the last insert
BEGIN

    INSERT INTO @Results
    SELECT TOP 1 * 
    FROM #Dates d
    WHERE d.StartDate = 
        (SELECT MIN(StartDate)
        FROM #Dates
END 

SELECT *
FROM @Results
ORDER BY 1

DROP TABLE #Dates;

Id  StartDate               EndDate
1   2017-03-18 00:00:00.000 2017-03-18 02:00:00.000
4   2017-03-18 02:10:00.000 2017-03-18 02:30:00.000
5   2017-03-18 02:35:00.000 2017-03-18 02:50:00.000
6   2017-03-18 03:20:00.000 2017-03-18 03:50:00.000
7   2017-03-18 05:00:00.000 2017-03-18 05:30:00.000
9   2017-03-18 05:50:00.000 2017-03-18 08:00:00.000

找到的解决方案:

WITH PIP AS (
    SELECT [D1].*, [T].*
    FROM #Dates [D1]
    OUTER APPLY (
        SELECT TOP 1 Id AS NextId FROM #Dates [D2]
        WHERE 
            NOT ([D1].StartDate <= [D2].EndDate AND [D1].EndDate >= [D2].StartDate) AND [D2].Id > [D1].Id
        ORDER BY [D2].StartDate
    ) AS [T]
), POP AS (
    SELECT [T].Id, [T].StartDate, [T].EndDate, [T].NextId
        FROM PIP [T]
    WHERE [T].Id = 1
    UNION ALL
    SELECT [X].Id, [X].StartDate, [X].EndDate, [X].NextId
        FROM PIP [X]
    JOIN POP [H] ON [H].NextId = [X].Id
)
SELECT * FROM POP;