Sum Over 窗口函数的意外结果
Unexpected result from Sum Over windowed function
这让我发疯,非常感谢任何帮助。下面的代码和结果 - 我的问题很简单。结果第4行,Activity不匹配lag_Activity,为什么组号不增加?
create table #exampleTable
(name varchar(20)
,pnum bigint
,activity varchar(10)
,startTime datetime
,endTime datetime)
insert into #exampleTable
values
('Harry Potter',12345678, 'On Shift', '2022-05-18 13:00:00', '2022-05-18 22:00:00')
,('Harry Potter',12345678,'Off Shift','2022-05-18 16:30:00','2022-05-18 17:30:00')
,('Jane Doe',98765432,'Off Shift','2022-05-18 02:00:00','2022-05-18 05:00:00')
,('Jane Doe',98765432,'On Shift','2022-05-18 02:00:00','2022-05-18 16:00:00')
,('Jane Doe',98765432,'Off Shift','2022-05-18 06:15:00','2022-05-18 06:45:00')
,('Jane Doe',98765432,'Off Shift','2022-05-18 11:30:00','2022-05-18 12:00:00')
,('Jane Doe',98765432,'Off Shift','2022-05-18 12:00:00','2022-05-18 15:50:00')
select
sum(case when activity = lag_activity and starttime <= lag_endtime then 0 else 1 end) over(partition by pnum order by pnum, starttime) as grp
,*
from (
select
*
,lag(endtime) over(order by pnum, starttime) lag_endtime
,lag(activity) over(order by pnum, starttime) lag_activity
from #exampleTable
where endtime-Starttime>0
) a
order by pnum, starttime
结果如下:
grp name pnum activity lag_activity startTime endTime lag_endtime
1 Harry Potter 12345678 On Shift NULL 2022-05-18 13:00:00.000 2022-05-18 22:00:00.000 NULL
2 Harry Potter 12345678 Off Shift On Shift 2022-05-18 16:30:00.000 2022-05-18 17:30:00.000 2022-05-18 22:00:00.000
1 Jane Doe 98765432 Off Shift Off Shift 2022-05-18 02:00:00.000 2022-05-18 05:00:00.000 2022-05-18 17:30:00.000
1 Jane Doe 98765432 On Shift Off Shift 2022-05-18 02:00:00.000 2022-05-18 16:00:00.000 2022-05-18 05:00:00.000
2 Jane Doe 98765432 Off Shift On Shift 2022-05-18 06:15:00.000 2022-05-18 06:45:00.000 2022-05-18 16:00:00.000
3 Jane Doe 98765432 Off Shift Off Shift 2022-05-18 11:30:00.000 2022-05-18 12:00:00.000 2022-05-18 06:45:00.000
3 Jane Doe 98765432 Off Shift Off Shift 2022-05-18 12:00:00.000 2022-05-18 15:50:00.000 2022-05-18 12:00:00.000
感谢@ConorCunninghamMSFT 的提示,滞后函数和 Sum Over 函数需要额外排序。顺序本身不如保持一致重要。
select
sum(case when activity = lag_activity and starttime <= lag_endtime then 0 else 1 end) over(partition by pnum order by pnum, starttime, endtime) as grp
,*
from (
select
*
,lag(endtime) over(order by pnum, starttime, endtime) lag_endtime
,lag(activity) over(order by pnum, starttime, endtime) lag_activity
from #exampleTable
where endtime-Starttime>0
) a
order by pnum, starttime, endtime
这让我发疯,非常感谢任何帮助。下面的代码和结果 - 我的问题很简单。结果第4行,Activity不匹配lag_Activity,为什么组号不增加?
create table #exampleTable
(name varchar(20)
,pnum bigint
,activity varchar(10)
,startTime datetime
,endTime datetime)
insert into #exampleTable
values
('Harry Potter',12345678, 'On Shift', '2022-05-18 13:00:00', '2022-05-18 22:00:00')
,('Harry Potter',12345678,'Off Shift','2022-05-18 16:30:00','2022-05-18 17:30:00')
,('Jane Doe',98765432,'Off Shift','2022-05-18 02:00:00','2022-05-18 05:00:00')
,('Jane Doe',98765432,'On Shift','2022-05-18 02:00:00','2022-05-18 16:00:00')
,('Jane Doe',98765432,'Off Shift','2022-05-18 06:15:00','2022-05-18 06:45:00')
,('Jane Doe',98765432,'Off Shift','2022-05-18 11:30:00','2022-05-18 12:00:00')
,('Jane Doe',98765432,'Off Shift','2022-05-18 12:00:00','2022-05-18 15:50:00')
select
sum(case when activity = lag_activity and starttime <= lag_endtime then 0 else 1 end) over(partition by pnum order by pnum, starttime) as grp
,*
from (
select
*
,lag(endtime) over(order by pnum, starttime) lag_endtime
,lag(activity) over(order by pnum, starttime) lag_activity
from #exampleTable
where endtime-Starttime>0
) a
order by pnum, starttime
结果如下:
grp name pnum activity lag_activity startTime endTime lag_endtime
1 Harry Potter 12345678 On Shift NULL 2022-05-18 13:00:00.000 2022-05-18 22:00:00.000 NULL
2 Harry Potter 12345678 Off Shift On Shift 2022-05-18 16:30:00.000 2022-05-18 17:30:00.000 2022-05-18 22:00:00.000
1 Jane Doe 98765432 Off Shift Off Shift 2022-05-18 02:00:00.000 2022-05-18 05:00:00.000 2022-05-18 17:30:00.000
1 Jane Doe 98765432 On Shift Off Shift 2022-05-18 02:00:00.000 2022-05-18 16:00:00.000 2022-05-18 05:00:00.000
2 Jane Doe 98765432 Off Shift On Shift 2022-05-18 06:15:00.000 2022-05-18 06:45:00.000 2022-05-18 16:00:00.000
3 Jane Doe 98765432 Off Shift Off Shift 2022-05-18 11:30:00.000 2022-05-18 12:00:00.000 2022-05-18 06:45:00.000
3 Jane Doe 98765432 Off Shift Off Shift 2022-05-18 12:00:00.000 2022-05-18 15:50:00.000 2022-05-18 12:00:00.000
感谢@ConorCunninghamMSFT 的提示,滞后函数和 Sum Over 函数需要额外排序。顺序本身不如保持一致重要。
select
sum(case when activity = lag_activity and starttime <= lag_endtime then 0 else 1 end) over(partition by pnum order by pnum, starttime, endtime) as grp
,*
from (
select
*
,lag(endtime) over(order by pnum, starttime, endtime) lag_endtime
,lag(activity) over(order by pnum, starttime, endtime) lag_activity
from #exampleTable
where endtime-Starttime>0
) a
order by pnum, starttime, endtime