SQL 连续日期累计计数

SQL Consecutive Date Cumulative Count

我正在进行一些花名册分析,需要确定员工何时连续工作 5 天或更长时间。在我的 table 中,我可以提取如下数据(注意,还有很多列,这只是一个缩减的示例):

Emp Start First_Entry
1234 23/06/2016 1
1234 24/06/2016 1
1234 24/06/2016 0
1234 25/06/2016 1
1234 26/06/2016 1
1234 27/06/2016 1
1234 28/06/2016 1
1234 29/06/2016 1
1234 29/06/2016 0
1234 30/06/2016 1
1234 2/07/2016 1
1234 3/07/2016 1
1234 3/07/2016 0
1234 4/07/2016 1
1234 4/07/2016 0
1234 5/07/2016 1
1234 6/07/2016 1
1234 9/07/2016 1
1234 10/07/2016 1
1234 11/07/2016 1
1234 12/07/2016 1

而我所追求的是这样的:

Emp Start First_Entry Consecutive_Days Over_5 Status
1234 23/06/2016 1 1 0 Worked < 5
1234 24/06/2016 1 2 0 Worked < 5
1234 24/06/2016 0 2 0 Worked < 5
1234 25/06/2016 1 3 0 Worked < 5
1234 26/06/2016 1 4 0 Worked < 5
1234 27/06/2016 1 5 1 Worked >= 5
1234 28/06/2016 1 6 1 Worked >= 5
1234 29/06/2016 1 7 1 Worked >= 5
1234 29/06/2016 0 7 1 Worked >= 5
1234 30/06/2016 1 8 1 Worked >= 5
1234 02/07/2016 1 1 0 Worked < 5
1234 03/07/2016 1 2 0 Worked < 5
1234 03/07/2016 0 2 0 Worked < 5
1234 04/07/2016 1 3 0 Worked < 5
1234 04/07/2016 0 3 0 Worked < 5
1234 05/07/2016 1 4 0 Worked < 5
1234 06/07/2016 1 5 1 Worked >= 5
1234 09/07/2016 1 1 0 Worked < 5
1234 10/07/2016 1 2 0 Worked < 5
1234 11/07/2016 1 3 0 Worked < 5
1234 12/07/2016 1 4 0 Worked < 5

我真的不知道如何获取连续几天的累计计数,所以你能提供的任何帮助都会很棒

可能有人会想出一个绝妙的解决方案,但这就可以了。您的问题看起来像“差距和孤岛”问题。找到日期范围的岛屿,我们可以很容易地找出其余的。在下面SQL中,@mindate 不是必须的,但更容易。

CREATE TABLE #temptable
(
    [Emp] CHAR(4),
    [startDate] DATE,
    [First_Entry] BIT
);
INSERT INTO #temptable
(
    [Emp],
    [startDate],
    [First_Entry]
)
VALUES
('1234', N'2016-06-23', 1),
('1234', N'2016-06-24', 1),
('1234', N'2016-06-24', 0),
('1234', N'2016-06-25', 1),
('1234', N'2016-06-26', 1),
('1234', N'2016-06-27', 1),
('1234', N'2016-06-28', 1),
('1234', N'2016-06-29', 1),
('1234', N'2016-06-29', 0),
('1234', N'2016-06-30', 1),
('1234', N'2016-07-02', 1),
('1234', N'2016-07-03', 1),
('1234', N'2016-07-03', 0),
('1234', N'2016-07-04', 1),
('1234', N'2016-07-04', 0),
('1234', N'2016-07-05', 1),
('1234', N'2016-07-06', 1),
('1234', N'2016-07-09', 1),
('1234', N'2016-07-10', 1),
('1234', N'2016-07-11', 1),
('1234', N'2016-07-12', 1);


DECLARE @minDate DATE;
SELECT @minDate = DATEADD(d, -1, MIN(startDate))
FROM #temptable;

WITH firstOnly
AS (SELECT *
    FROM #temptable
    WHERE First_Entry = 1),
     grouper (emp, startDate, grp)
AS (SELECT Emp,
           startDate,
           DATEDIFF(d, @minDate, startDate) - ROW_NUMBER() OVER (PARTITION BY Emp ORDER BY startDate)
    FROM firstOnly),
     islands (emp, START, [end])
AS (SELECT emp,
           MIN(startDate),
           MAX(startDate)
    FROM grouper
    GROUP BY emp,
             grp),
     consecutives (emp, startDate, consecutive_days)
AS (SELECT f.Emp,
           f.startDate,
           --       i.START,
           --       i.[end],
           ROW_NUMBER() OVER (PARTITION BY f.Emp, i.START ORDER BY i.START)
    FROM firstOnly f
        INNER JOIN islands i
            ON f.startDate
               BETWEEN i.START AND i.[end])
SELECT t.Emp,
       t.startDate,
       t.First_Entry,
       c.consecutive_days,
       CAST(CASE
                WHEN c.consecutive_days < 5 THEN
                    0
                ELSE
                    1
            END AS BIT) Over_5,
       CASE
           WHEN c.consecutive_days < 5 THEN
               'Worked < 5'
           ELSE
               'Worked >= 5'
       END [Status]
FROM consecutives c
    INNER JOIN #temptable t
        ON t.Emp = c.emp
           AND t.startDate = c.startDate;

DROP TABLE #temptable;

这是一个孤岛和间隙问题,你可以尝试使用LAG window函数为每个Emp获取前startDate行,十个使用SUM window函数计算哪些天是连续的。

最后,我们可以用CASE WHEN表达式判断天是否大于5

;WITH CTE AS (
    SELECT  [Emp],
        [startDate],
        [First_Entry],
        SUM(CASE WHEN DATEDIFF(dd,f_Dt,startDate) <= 1 THEN 0 ELSE 1 END) OVER(PARTITION BY Emp ORDER BY startDate) grp 
    FROM (
      SELECT *,
           LAG(startDate,1,startDate) OVER(PARTITION BY Emp ORDER BY startDate) f_Dt
      FROM T
    ) t1
)
SELECT  [Emp],
    [startDate],
    [First_Entry],
    SUM(CASE WHEN First_Entry = 1 THEN 1 ELSE 0 END) OVER(PARTITION BY Emp,grp ORDER BY startDate) Consecutive_Days,
    (CASE WHEN SUM(CASE WHEN First_Entry = 1 THEN 1 ELSE 0 END) OVER(PARTITION BY Emp,grp ORDER BY startDate) >= 5 THEN 1 ELSE 0 END) Over_5,
     (CASE WHEN SUM(CASE WHEN First_Entry = 1 THEN 1 ELSE 0 END) OVER(PARTITION BY Emp,grp ORDER BY startDate) >= 5 THEN 'Worked >= 5' ELSE 'Worked < 5' END) Status
FROM CTE 

sqlfiddle