SQL 连续日期累计计数
SQL Consecutive Date Cumulative Count
我正在进行一些花名册分析,需要确定员工何时连续工作 5 天或更长时间。在我的 table 中,我可以提取如下数据(注意,还有很多列,这只是一个缩减的示例):
Emp
Start
First_Entry
1234
23/06/2016
1
1234
24/06/2016
1
1234
24/06/2016
0
1234
25/06/2016
1
1234
26/06/2016
1
1234
27/06/2016
1
1234
28/06/2016
1
1234
29/06/2016
1
1234
29/06/2016
0
1234
30/06/2016
1
1234
2/07/2016
1
1234
3/07/2016
1
1234
3/07/2016
0
1234
4/07/2016
1
1234
4/07/2016
0
1234
5/07/2016
1
1234
6/07/2016
1
1234
9/07/2016
1
1234
10/07/2016
1
1234
11/07/2016
1
1234
12/07/2016
1
而我所追求的是这样的:
Emp
Start
First_Entry
Consecutive_Days
Over_5
Status
1234
23/06/2016
1
1
0
Worked < 5
1234
24/06/2016
1
2
0
Worked < 5
1234
24/06/2016
0
2
0
Worked < 5
1234
25/06/2016
1
3
0
Worked < 5
1234
26/06/2016
1
4
0
Worked < 5
1234
27/06/2016
1
5
1
Worked >= 5
1234
28/06/2016
1
6
1
Worked >= 5
1234
29/06/2016
1
7
1
Worked >= 5
1234
29/06/2016
0
7
1
Worked >= 5
1234
30/06/2016
1
8
1
Worked >= 5
1234
02/07/2016
1
1
0
Worked < 5
1234
03/07/2016
1
2
0
Worked < 5
1234
03/07/2016
0
2
0
Worked < 5
1234
04/07/2016
1
3
0
Worked < 5
1234
04/07/2016
0
3
0
Worked < 5
1234
05/07/2016
1
4
0
Worked < 5
1234
06/07/2016
1
5
1
Worked >= 5
1234
09/07/2016
1
1
0
Worked < 5
1234
10/07/2016
1
2
0
Worked < 5
1234
11/07/2016
1
3
0
Worked < 5
1234
12/07/2016
1
4
0
Worked < 5
我真的不知道如何获取连续几天的累计计数,所以你能提供的任何帮助都会很棒
可能有人会想出一个绝妙的解决方案,但这就可以了。您的问题看起来像“差距和孤岛”问题。找到日期范围的岛屿,我们可以很容易地找出其余的。在下面SQL中,@mindate 不是必须的,但更容易。
CREATE TABLE #temptable
(
[Emp] CHAR(4),
[startDate] DATE,
[First_Entry] BIT
);
INSERT INTO #temptable
(
[Emp],
[startDate],
[First_Entry]
)
VALUES
('1234', N'2016-06-23', 1),
('1234', N'2016-06-24', 1),
('1234', N'2016-06-24', 0),
('1234', N'2016-06-25', 1),
('1234', N'2016-06-26', 1),
('1234', N'2016-06-27', 1),
('1234', N'2016-06-28', 1),
('1234', N'2016-06-29', 1),
('1234', N'2016-06-29', 0),
('1234', N'2016-06-30', 1),
('1234', N'2016-07-02', 1),
('1234', N'2016-07-03', 1),
('1234', N'2016-07-03', 0),
('1234', N'2016-07-04', 1),
('1234', N'2016-07-04', 0),
('1234', N'2016-07-05', 1),
('1234', N'2016-07-06', 1),
('1234', N'2016-07-09', 1),
('1234', N'2016-07-10', 1),
('1234', N'2016-07-11', 1),
('1234', N'2016-07-12', 1);
DECLARE @minDate DATE;
SELECT @minDate = DATEADD(d, -1, MIN(startDate))
FROM #temptable;
WITH firstOnly
AS (SELECT *
FROM #temptable
WHERE First_Entry = 1),
grouper (emp, startDate, grp)
AS (SELECT Emp,
startDate,
DATEDIFF(d, @minDate, startDate) - ROW_NUMBER() OVER (PARTITION BY Emp ORDER BY startDate)
FROM firstOnly),
islands (emp, START, [end])
AS (SELECT emp,
MIN(startDate),
MAX(startDate)
FROM grouper
GROUP BY emp,
grp),
consecutives (emp, startDate, consecutive_days)
AS (SELECT f.Emp,
f.startDate,
-- i.START,
-- i.[end],
ROW_NUMBER() OVER (PARTITION BY f.Emp, i.START ORDER BY i.START)
FROM firstOnly f
INNER JOIN islands i
ON f.startDate
BETWEEN i.START AND i.[end])
SELECT t.Emp,
t.startDate,
t.First_Entry,
c.consecutive_days,
CAST(CASE
WHEN c.consecutive_days < 5 THEN
0
ELSE
1
END AS BIT) Over_5,
CASE
WHEN c.consecutive_days < 5 THEN
'Worked < 5'
ELSE
'Worked >= 5'
END [Status]
FROM consecutives c
INNER JOIN #temptable t
ON t.Emp = c.emp
AND t.startDate = c.startDate;
DROP TABLE #temptable;
这是一个孤岛和间隙问题,你可以尝试使用LAG
window函数为每个Emp
获取前startDate
行,十个使用SUM
window函数计算哪些天是连续的。
最后,我们可以用CASE WHEN
表达式判断天是否大于5
;WITH CTE AS (
SELECT [Emp],
[startDate],
[First_Entry],
SUM(CASE WHEN DATEDIFF(dd,f_Dt,startDate) <= 1 THEN 0 ELSE 1 END) OVER(PARTITION BY Emp ORDER BY startDate) grp
FROM (
SELECT *,
LAG(startDate,1,startDate) OVER(PARTITION BY Emp ORDER BY startDate) f_Dt
FROM T
) t1
)
SELECT [Emp],
[startDate],
[First_Entry],
SUM(CASE WHEN First_Entry = 1 THEN 1 ELSE 0 END) OVER(PARTITION BY Emp,grp ORDER BY startDate) Consecutive_Days,
(CASE WHEN SUM(CASE WHEN First_Entry = 1 THEN 1 ELSE 0 END) OVER(PARTITION BY Emp,grp ORDER BY startDate) >= 5 THEN 1 ELSE 0 END) Over_5,
(CASE WHEN SUM(CASE WHEN First_Entry = 1 THEN 1 ELSE 0 END) OVER(PARTITION BY Emp,grp ORDER BY startDate) >= 5 THEN 'Worked >= 5' ELSE 'Worked < 5' END) Status
FROM CTE
我正在进行一些花名册分析,需要确定员工何时连续工作 5 天或更长时间。在我的 table 中,我可以提取如下数据(注意,还有很多列,这只是一个缩减的示例):
Emp | Start | First_Entry |
---|---|---|
1234 | 23/06/2016 | 1 |
1234 | 24/06/2016 | 1 |
1234 | 24/06/2016 | 0 |
1234 | 25/06/2016 | 1 |
1234 | 26/06/2016 | 1 |
1234 | 27/06/2016 | 1 |
1234 | 28/06/2016 | 1 |
1234 | 29/06/2016 | 1 |
1234 | 29/06/2016 | 0 |
1234 | 30/06/2016 | 1 |
1234 | 2/07/2016 | 1 |
1234 | 3/07/2016 | 1 |
1234 | 3/07/2016 | 0 |
1234 | 4/07/2016 | 1 |
1234 | 4/07/2016 | 0 |
1234 | 5/07/2016 | 1 |
1234 | 6/07/2016 | 1 |
1234 | 9/07/2016 | 1 |
1234 | 10/07/2016 | 1 |
1234 | 11/07/2016 | 1 |
1234 | 12/07/2016 | 1 |
而我所追求的是这样的:
Emp | Start | First_Entry | Consecutive_Days | Over_5 | Status |
---|---|---|---|---|---|
1234 | 23/06/2016 | 1 | 1 | 0 | Worked < 5 |
1234 | 24/06/2016 | 1 | 2 | 0 | Worked < 5 |
1234 | 24/06/2016 | 0 | 2 | 0 | Worked < 5 |
1234 | 25/06/2016 | 1 | 3 | 0 | Worked < 5 |
1234 | 26/06/2016 | 1 | 4 | 0 | Worked < 5 |
1234 | 27/06/2016 | 1 | 5 | 1 | Worked >= 5 |
1234 | 28/06/2016 | 1 | 6 | 1 | Worked >= 5 |
1234 | 29/06/2016 | 1 | 7 | 1 | Worked >= 5 |
1234 | 29/06/2016 | 0 | 7 | 1 | Worked >= 5 |
1234 | 30/06/2016 | 1 | 8 | 1 | Worked >= 5 |
1234 | 02/07/2016 | 1 | 1 | 0 | Worked < 5 |
1234 | 03/07/2016 | 1 | 2 | 0 | Worked < 5 |
1234 | 03/07/2016 | 0 | 2 | 0 | Worked < 5 |
1234 | 04/07/2016 | 1 | 3 | 0 | Worked < 5 |
1234 | 04/07/2016 | 0 | 3 | 0 | Worked < 5 |
1234 | 05/07/2016 | 1 | 4 | 0 | Worked < 5 |
1234 | 06/07/2016 | 1 | 5 | 1 | Worked >= 5 |
1234 | 09/07/2016 | 1 | 1 | 0 | Worked < 5 |
1234 | 10/07/2016 | 1 | 2 | 0 | Worked < 5 |
1234 | 11/07/2016 | 1 | 3 | 0 | Worked < 5 |
1234 | 12/07/2016 | 1 | 4 | 0 | Worked < 5 |
我真的不知道如何获取连续几天的累计计数,所以你能提供的任何帮助都会很棒
可能有人会想出一个绝妙的解决方案,但这就可以了。您的问题看起来像“差距和孤岛”问题。找到日期范围的岛屿,我们可以很容易地找出其余的。在下面SQL中,@mindate 不是必须的,但更容易。
CREATE TABLE #temptable
(
[Emp] CHAR(4),
[startDate] DATE,
[First_Entry] BIT
);
INSERT INTO #temptable
(
[Emp],
[startDate],
[First_Entry]
)
VALUES
('1234', N'2016-06-23', 1),
('1234', N'2016-06-24', 1),
('1234', N'2016-06-24', 0),
('1234', N'2016-06-25', 1),
('1234', N'2016-06-26', 1),
('1234', N'2016-06-27', 1),
('1234', N'2016-06-28', 1),
('1234', N'2016-06-29', 1),
('1234', N'2016-06-29', 0),
('1234', N'2016-06-30', 1),
('1234', N'2016-07-02', 1),
('1234', N'2016-07-03', 1),
('1234', N'2016-07-03', 0),
('1234', N'2016-07-04', 1),
('1234', N'2016-07-04', 0),
('1234', N'2016-07-05', 1),
('1234', N'2016-07-06', 1),
('1234', N'2016-07-09', 1),
('1234', N'2016-07-10', 1),
('1234', N'2016-07-11', 1),
('1234', N'2016-07-12', 1);
DECLARE @minDate DATE;
SELECT @minDate = DATEADD(d, -1, MIN(startDate))
FROM #temptable;
WITH firstOnly
AS (SELECT *
FROM #temptable
WHERE First_Entry = 1),
grouper (emp, startDate, grp)
AS (SELECT Emp,
startDate,
DATEDIFF(d, @minDate, startDate) - ROW_NUMBER() OVER (PARTITION BY Emp ORDER BY startDate)
FROM firstOnly),
islands (emp, START, [end])
AS (SELECT emp,
MIN(startDate),
MAX(startDate)
FROM grouper
GROUP BY emp,
grp),
consecutives (emp, startDate, consecutive_days)
AS (SELECT f.Emp,
f.startDate,
-- i.START,
-- i.[end],
ROW_NUMBER() OVER (PARTITION BY f.Emp, i.START ORDER BY i.START)
FROM firstOnly f
INNER JOIN islands i
ON f.startDate
BETWEEN i.START AND i.[end])
SELECT t.Emp,
t.startDate,
t.First_Entry,
c.consecutive_days,
CAST(CASE
WHEN c.consecutive_days < 5 THEN
0
ELSE
1
END AS BIT) Over_5,
CASE
WHEN c.consecutive_days < 5 THEN
'Worked < 5'
ELSE
'Worked >= 5'
END [Status]
FROM consecutives c
INNER JOIN #temptable t
ON t.Emp = c.emp
AND t.startDate = c.startDate;
DROP TABLE #temptable;
这是一个孤岛和间隙问题,你可以尝试使用LAG
window函数为每个Emp
获取前startDate
行,十个使用SUM
window函数计算哪些天是连续的。
最后,我们可以用CASE WHEN
表达式判断天是否大于5
;WITH CTE AS (
SELECT [Emp],
[startDate],
[First_Entry],
SUM(CASE WHEN DATEDIFF(dd,f_Dt,startDate) <= 1 THEN 0 ELSE 1 END) OVER(PARTITION BY Emp ORDER BY startDate) grp
FROM (
SELECT *,
LAG(startDate,1,startDate) OVER(PARTITION BY Emp ORDER BY startDate) f_Dt
FROM T
) t1
)
SELECT [Emp],
[startDate],
[First_Entry],
SUM(CASE WHEN First_Entry = 1 THEN 1 ELSE 0 END) OVER(PARTITION BY Emp,grp ORDER BY startDate) Consecutive_Days,
(CASE WHEN SUM(CASE WHEN First_Entry = 1 THEN 1 ELSE 0 END) OVER(PARTITION BY Emp,grp ORDER BY startDate) >= 5 THEN 1 ELSE 0 END) Over_5,
(CASE WHEN SUM(CASE WHEN First_Entry = 1 THEN 1 ELSE 0 END) OVER(PARTITION BY Emp,grp ORDER BY startDate) >= 5 THEN 'Worked >= 5' ELSE 'Worked < 5' END) Status
FROM CTE