Query 和 Partition By 子句 group by window
Query and Partition By clause group by window
我有以下代码
declare @test table (id int, [Status] int, [Date] date)
insert into @test (Id,[Status],[Date]) VALUES
(1,1,'2018-01-01'),
(2,1,'2018-01-01'),
(1,1,'2017-11-01'),
(1,2,'2017-10-01'),
(1,1,'2017-09-01'),
(2,2,'2017-01-01'),
(1,1,'2017-08-01'),
(1,1,'2017-07-01'),
(1,1,'2017-06-01'),
(1,2,'2017-05-01'),
(1,1,'2017-04-01'),
(1,1,'2017-03-01'),
(1,1,'2017-01-01')
SELECT
id,
[Status],
MIN([Date]) OVER (PARTITION BY id,[Status] ORDER BY [Date],id,[Status] ) as WindowStart,
max([Date]) OVER (PARTITION BY id,[Status] ORDER BY [Date],id,[Status]) as WindowEnd,
COUNT(*) OVER (PARTITION BY id,[Status] ORDER BY [Date],id,[Status] ) as total
from @test
但结果是这样的:
id Status WindowStart WindowEnd total
1 1 2017-01-01 2017-01-01 1
1 1 2017-01-01 2017-03-01 2
1 1 2017-01-01 2017-04-01 3
1 1 2017-01-01 2017-06-01 4
1 1 2017-01-01 2017-07-01 5
1 1 2017-01-01 2017-08-01 6
1 1 2017-01-01 2017-09-01 7
1 1 2017-01-01 2017-11-01 8
1 1 2017-01-01 2018-01-01 9
1 2 2017-05-01 2017-05-01 1
1 2 2017-05-01 2017-10-01 2
2 1 2018-01-01 2018-01-01 1
2 2 2017-01-01 2017-01-01 1
我需要像这样按window分组。
id Status WindowStart WindowEnd total
1 1 2017-01-01 2017-04-01 3
1 2 2017-05-01 2017-05-01 1
1 1 2017-06-01 2017-09-01 4
1 2 2017-10-01 2017-10-01 1
1 1 2017-11-01 2018-01-01 2
2 1 2018-01-01 2018-01-01 1
2 2 2017-01-01 2017-01-01 1
id= 1 Status = 1 的第一组应该在 Status = 2 (2017-05-01) 的第一行结束所以总数是 3 然后从 2017-06-01 重新开始到2017-09-01 共4行。
如何才能做到这一点?
这是一个 "classic" 组和岛屿问题。互联网上可能有 1000 个这些问题的答案。
这适用于您想要的东西,但是,请尝试事先进行更多研究。 :)
WITH Groups AS(
SELECT t.*,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY [Date]) -
ROW_NUMBER() OVER (PARTITION BY id, [status] ORDER BY [Date]) AS Grp
FROM @test t)
SELECT G.id,
G.[Status],
MIN([Date]) AS WindowStart,
MAX([date]) AS WindowsEnd,
COUNT(*) AS Total
FROM Groups G
GROUP BY G.id,
G.[Status],
G.Grp
ORDER BY G.id, WindowStart;
请注意,最后两行的顺序在此解决方案中是相反的;在您的预期结果中,您似乎为 id 1 订购了 ASCENDING
,为 id 2 订购了 DESCENDING
。
这是使用 LAG
函数
的一种方法
;WITH cte
AS (SELECT *,
grp = Sum(CASE WHEN prev_val = Status THEN 0 ELSE 1 END)
OVER(partition BY id ORDER BY Date)
FROM (SELECT *,
prev_val = Lag(Status)OVER(partition BY id ORDER BY Date)
FROM @test) a)
SELECT id,
Status,
WindowStart = Min(date),
WindowEnd = Max(date),
Total = Count(*)
FROM cte
GROUP BY id, Status, grp
使用lag
函数首先找到每个日期的先前状态,然后使用Sum over()
创建一个组,只有当状态发生变化时才递增数字。
我有以下代码
declare @test table (id int, [Status] int, [Date] date)
insert into @test (Id,[Status],[Date]) VALUES
(1,1,'2018-01-01'),
(2,1,'2018-01-01'),
(1,1,'2017-11-01'),
(1,2,'2017-10-01'),
(1,1,'2017-09-01'),
(2,2,'2017-01-01'),
(1,1,'2017-08-01'),
(1,1,'2017-07-01'),
(1,1,'2017-06-01'),
(1,2,'2017-05-01'),
(1,1,'2017-04-01'),
(1,1,'2017-03-01'),
(1,1,'2017-01-01')
SELECT
id,
[Status],
MIN([Date]) OVER (PARTITION BY id,[Status] ORDER BY [Date],id,[Status] ) as WindowStart,
max([Date]) OVER (PARTITION BY id,[Status] ORDER BY [Date],id,[Status]) as WindowEnd,
COUNT(*) OVER (PARTITION BY id,[Status] ORDER BY [Date],id,[Status] ) as total
from @test
但结果是这样的:
id Status WindowStart WindowEnd total
1 1 2017-01-01 2017-01-01 1
1 1 2017-01-01 2017-03-01 2
1 1 2017-01-01 2017-04-01 3
1 1 2017-01-01 2017-06-01 4
1 1 2017-01-01 2017-07-01 5
1 1 2017-01-01 2017-08-01 6
1 1 2017-01-01 2017-09-01 7
1 1 2017-01-01 2017-11-01 8
1 1 2017-01-01 2018-01-01 9
1 2 2017-05-01 2017-05-01 1
1 2 2017-05-01 2017-10-01 2
2 1 2018-01-01 2018-01-01 1
2 2 2017-01-01 2017-01-01 1
我需要像这样按window分组。
id Status WindowStart WindowEnd total
1 1 2017-01-01 2017-04-01 3
1 2 2017-05-01 2017-05-01 1
1 1 2017-06-01 2017-09-01 4
1 2 2017-10-01 2017-10-01 1
1 1 2017-11-01 2018-01-01 2
2 1 2018-01-01 2018-01-01 1
2 2 2017-01-01 2017-01-01 1
id= 1 Status = 1 的第一组应该在 Status = 2 (2017-05-01) 的第一行结束所以总数是 3 然后从 2017-06-01 重新开始到2017-09-01 共4行。
如何才能做到这一点?
这是一个 "classic" 组和岛屿问题。互联网上可能有 1000 个这些问题的答案。
这适用于您想要的东西,但是,请尝试事先进行更多研究。 :)
WITH Groups AS(
SELECT t.*,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY [Date]) -
ROW_NUMBER() OVER (PARTITION BY id, [status] ORDER BY [Date]) AS Grp
FROM @test t)
SELECT G.id,
G.[Status],
MIN([Date]) AS WindowStart,
MAX([date]) AS WindowsEnd,
COUNT(*) AS Total
FROM Groups G
GROUP BY G.id,
G.[Status],
G.Grp
ORDER BY G.id, WindowStart;
请注意,最后两行的顺序在此解决方案中是相反的;在您的预期结果中,您似乎为 id 1 订购了 ASCENDING
,为 id 2 订购了 DESCENDING
。
这是使用 LAG
函数
;WITH cte
AS (SELECT *,
grp = Sum(CASE WHEN prev_val = Status THEN 0 ELSE 1 END)
OVER(partition BY id ORDER BY Date)
FROM (SELECT *,
prev_val = Lag(Status)OVER(partition BY id ORDER BY Date)
FROM @test) a)
SELECT id,
Status,
WindowStart = Min(date),
WindowEnd = Max(date),
Total = Count(*)
FROM cte
GROUP BY id, Status, grp
使用lag
函数首先找到每个日期的先前状态,然后使用Sum over()
创建一个组,只有当状态发生变化时才递增数字。