TSQL - 仅对连续行进行 GROUP BY
TSQL - GROUP BY on continous rows only
我正在开发 SQL Server 2017 (v14.0)。
我有一个 table 这样的:
Key | State | from | until |
----+----------+------------+------------+
100 | open | 01.01.2021 | 01.01.2021 |
100 | open | 02.01.2021 | 02.01.2021 |
100 | closed | 03.01.2021 | 13.01.2021 |
100 | open | 14.01.2021 | 20.01.2021 |
100 | open | 20.01.2021 | 30.01.2021 |
我想按 Key
和 State
对其进行分组,但仅限于连续行。
所以我的预期结果是这样的:
Key | State | from | until |
----+----------+------------+------------+
100 | open | 01.01.2021 | 02.01.2021 |
100 | closed | 03.01.2021 | 13.01.2021 |
100 | open | 14.01.2021 | 30.01.2021 |
知道怎么做吗?我有一种强烈的感觉,在 ROW_NUMBER
的帮助下这应该是可能的,但我还没有弄清楚......
(在这个示例数据中,一些奇怪的 group by calendarweek
或类似的东西可能是可能的,但这不是我的意图)
这是一个缺口和孤岛问题。一种解决方案是:
WITH cte1 AS (
SELECT *, CASE WHEN LAG([state]) OVER (PARTITION BY [key] ORDER BY [from]) = [state] THEN 0 ELSE 1 END AS chg
FROM t
), cte2 AS (
SELECT *, SUM(chg) OVER (PARTITION BY [key] ORDER BY [from]) AS grp
FROM cte1
)
SELECT [key], grp, MIN([state]), MIN([from]), MAX([until])
FROM cte2
GROUP BY [key], grp
ORDER BY [key], grp
一种可能如下补充:
Create table myTable_o1
(
[key] int
,[state] varchar(100)
,[From] date
,[Until] date
)
insert into myTable_o1 values (100, 'open', '2021-01-01','2021-01-01')
insert into myTable_o1 values (100, 'open', '2021-01-02','2021-01-02')
insert into myTable_o1 values (100, 'closed', '2021-01-03','2021-01-13')
insert into myTable_o1 values (100, 'open', '2021-01-4','2021-01-20')
insert into myTable_o1 values (100, 'open', '2021-01-20','2021-01-30')
SELECT
[key]
,[state]
,[From]
,[until]
FROM
(
Select
[key]
, [state]
, [From]
, row_number() over (partition by tiles order by [key]) row_num
, ISNULL(Lead(Until) over (partition by tiles order by [key]) , Until) [until]
FROM
(
SELECT * ,
Ntile(2) over ( order by [Key]) as [tiles]
from myTable_o1
) AS A
) AS B WHERE B.row_num in (1,3)
我正在开发 SQL Server 2017 (v14.0)。
我有一个 table 这样的:
Key | State | from | until |
----+----------+------------+------------+
100 | open | 01.01.2021 | 01.01.2021 |
100 | open | 02.01.2021 | 02.01.2021 |
100 | closed | 03.01.2021 | 13.01.2021 |
100 | open | 14.01.2021 | 20.01.2021 |
100 | open | 20.01.2021 | 30.01.2021 |
我想按 Key
和 State
对其进行分组,但仅限于连续行。
所以我的预期结果是这样的:
Key | State | from | until |
----+----------+------------+------------+
100 | open | 01.01.2021 | 02.01.2021 |
100 | closed | 03.01.2021 | 13.01.2021 |
100 | open | 14.01.2021 | 30.01.2021 |
知道怎么做吗?我有一种强烈的感觉,在 ROW_NUMBER
的帮助下这应该是可能的,但我还没有弄清楚......
(在这个示例数据中,一些奇怪的 group by calendarweek
或类似的东西可能是可能的,但这不是我的意图)
这是一个缺口和孤岛问题。一种解决方案是:
WITH cte1 AS (
SELECT *, CASE WHEN LAG([state]) OVER (PARTITION BY [key] ORDER BY [from]) = [state] THEN 0 ELSE 1 END AS chg
FROM t
), cte2 AS (
SELECT *, SUM(chg) OVER (PARTITION BY [key] ORDER BY [from]) AS grp
FROM cte1
)
SELECT [key], grp, MIN([state]), MIN([from]), MAX([until])
FROM cte2
GROUP BY [key], grp
ORDER BY [key], grp
一种可能如下补充:
Create table myTable_o1
(
[key] int
,[state] varchar(100)
,[From] date
,[Until] date
)
insert into myTable_o1 values (100, 'open', '2021-01-01','2021-01-01')
insert into myTable_o1 values (100, 'open', '2021-01-02','2021-01-02')
insert into myTable_o1 values (100, 'closed', '2021-01-03','2021-01-13')
insert into myTable_o1 values (100, 'open', '2021-01-4','2021-01-20')
insert into myTable_o1 values (100, 'open', '2021-01-20','2021-01-30')
SELECT
[key]
,[state]
,[From]
,[until]
FROM
(
Select
[key]
, [state]
, [From]
, row_number() over (partition by tiles order by [key]) row_num
, ISNULL(Lead(Until) over (partition by tiles order by [key]) , Until) [until]
FROM
(
SELECT * ,
Ntile(2) over ( order by [Key]) as [tiles]
from myTable_o1
) AS A
) AS B WHERE B.row_num in (1,3)