将多个期间分组为一个期间
Grouping multiple periods into one period
我有一个时间范围列表。每条记录都有一个 ID、开始和结束 date/time.
几条记录背靠背。表示结束时间是下一条记录的开始时间。我需要把这些记录连在一起形成一个长周期。
我尝试了 Lag/Lead 函数,尝试按初始 ID 分组,但没有成功。
这是数据的截图。 "IsInAGroup" 是我使用 lag/lead 派生的列...
SELECT ID ,PeriodStart ,PeriodEnd ,CASE WHEN
LEAD(PeriodStart) over (partition by REG_NUMBER order by PeriodStart) =PeriodEnd THEN
1
--ID WHEN LAG(PeriodEnd) over (partition by REG_NUMBER order by PeriodStart) =PeriodStart THEN --ID 1
ELSE
NULL
END as "IsInAGroup" FROM #tmpACTIVITIES WHERE REG_NUMBER='ABC123' AND PeriodStart >='6/1/2018' ORDER BY 2
这样做有用吗?即您只想将 IsInAGroup 设置为 1,其中存在不同的记录,其开始或结束时间段与相关记录的结束或开始时间段相匹配:
update a
set IsInAGroup = 1
from myTable a
where exists
(
select top 1 1
from myTable b
where b.Id != a.Id --it's a different record
and
(
b.PeriodEnd = a.PeriodStart --but the record is immediately before our record
or b.PeriodStart = a.PeriodEnd --or the record is immediately after
)
)
更新
根据评论,如果您希望 "squash" 将一堆记录合并为一条记录,请尝试使用递归 CTE。
with cte as
(
--get all periods which don't immediately follow another period
--these are the first record in the group (including groups of 1 record)
--NB: assumes that a single record cannot have its PeriodStart = its own PeriodEnd
select Id, PeriodStart, PeriodEnd, 1 Iteration
from myTable
where PeriodStart not in (select PeriodEnd from myTable)
union all
--recursively get each period with a start date matching the last record's end date.
--persist the original id and start date, use the new record's end date, add 1 to the iteration column each recursion
select cte.Id, cte.PeriodStart, mt.PeriodEnd, cte.Iteration + 1
from cte
inner join myTable mt on mt.PeriodStart = cte.PeriodEnd
)
, cte2 as
(
--get all records / invert the Iteration (so the last record in a window has value r=1)
select id, PeriodStart, PeriodEnd, row_number() over (partition by id order by Iteration desc) r
from cte
)
--select all records where r=1 (i.e. the last created by the recursive cte, giving the largest range of start-to-end date for each id
select Id, PeriodStart, PeriodEnd
from cte2
where r = 1
希望评论能解释发生了什么;但如果您需要任何说明,请发表评论。
with cte as
(
--get all periods which don't immediately follow another period
--these are the first record in the group (including groups of 1 record)
--NB: assumes that a single record cannot have its PeriodStart = its own PeriodEnd
select T1.ID, T1.START_TIME, T1.END_TIME, 1 Iteration,T1.REG_NUMBER
from
#tmpACTIVITIES T1
LEFT JOIN #tmpACTIVITIES T2 ON (T1.REG_NUMBER=T2.REG_NUMBER) AND (T1.START_TIME=T2.END_TIME)
WHERE
T2.ID IS NULL
--where START_TIME not in (select END_TIME from #tmpACTIVITIES)
union all
--recursively get each period with a start date matching the last record's end date.
--persist the original id and start date, use the new record's end date, add 1 to the iteration column each recursion
select cte.ID, cte.START_TIME, mt.END_TIME, cte.Iteration + 1,cte.REG_NUMBER
from cte
inner join #tmpACTIVITIES mt on (mt.REG_NUMBER=cte.REG_NUMBER) AND (mt.START_TIME = cte.END_TIME)
)
, cte2 as
(
--get all records / invert the Iteration (so the last record in a window has value r=1)
select ID, START_TIME, END_TIME, REG_NUMBER ,row_number() over (partition by REG_NUMBER,ID order by Iteration desc) r
from cte
)
--select all records where r=1 (i.e. the last created by the recursive cte, giving the largest range of start-to-end date for each id
select ID, START_TIME, END_TIME,REG_NUMBER
from cte2
where r = 1
我有一个时间范围列表。每条记录都有一个 ID、开始和结束 date/time.
几条记录背靠背。表示结束时间是下一条记录的开始时间。我需要把这些记录连在一起形成一个长周期。
我尝试了 Lag/Lead 函数,尝试按初始 ID 分组,但没有成功。
这是数据的截图。 "IsInAGroup" 是我使用 lag/lead 派生的列...
SELECT ID ,PeriodStart ,PeriodEnd ,CASE WHEN LEAD(PeriodStart) over (partition by REG_NUMBER order by PeriodStart) =PeriodEnd THEN 1 --ID WHEN LAG(PeriodEnd) over (partition by REG_NUMBER order by PeriodStart) =PeriodStart THEN --ID 1 ELSE NULL END as "IsInAGroup" FROM #tmpACTIVITIES WHERE REG_NUMBER='ABC123' AND PeriodStart >='6/1/2018' ORDER BY 2
这样做有用吗?即您只想将 IsInAGroup 设置为 1,其中存在不同的记录,其开始或结束时间段与相关记录的结束或开始时间段相匹配:
update a
set IsInAGroup = 1
from myTable a
where exists
(
select top 1 1
from myTable b
where b.Id != a.Id --it's a different record
and
(
b.PeriodEnd = a.PeriodStart --but the record is immediately before our record
or b.PeriodStart = a.PeriodEnd --or the record is immediately after
)
)
更新
根据评论,如果您希望 "squash" 将一堆记录合并为一条记录,请尝试使用递归 CTE。
with cte as
(
--get all periods which don't immediately follow another period
--these are the first record in the group (including groups of 1 record)
--NB: assumes that a single record cannot have its PeriodStart = its own PeriodEnd
select Id, PeriodStart, PeriodEnd, 1 Iteration
from myTable
where PeriodStart not in (select PeriodEnd from myTable)
union all
--recursively get each period with a start date matching the last record's end date.
--persist the original id and start date, use the new record's end date, add 1 to the iteration column each recursion
select cte.Id, cte.PeriodStart, mt.PeriodEnd, cte.Iteration + 1
from cte
inner join myTable mt on mt.PeriodStart = cte.PeriodEnd
)
, cte2 as
(
--get all records / invert the Iteration (so the last record in a window has value r=1)
select id, PeriodStart, PeriodEnd, row_number() over (partition by id order by Iteration desc) r
from cte
)
--select all records where r=1 (i.e. the last created by the recursive cte, giving the largest range of start-to-end date for each id
select Id, PeriodStart, PeriodEnd
from cte2
where r = 1
希望评论能解释发生了什么;但如果您需要任何说明,请发表评论。
with cte as
(
--get all periods which don't immediately follow another period
--these are the first record in the group (including groups of 1 record)
--NB: assumes that a single record cannot have its PeriodStart = its own PeriodEnd
select T1.ID, T1.START_TIME, T1.END_TIME, 1 Iteration,T1.REG_NUMBER
from
#tmpACTIVITIES T1
LEFT JOIN #tmpACTIVITIES T2 ON (T1.REG_NUMBER=T2.REG_NUMBER) AND (T1.START_TIME=T2.END_TIME)
WHERE
T2.ID IS NULL
--where START_TIME not in (select END_TIME from #tmpACTIVITIES)
union all
--recursively get each period with a start date matching the last record's end date.
--persist the original id and start date, use the new record's end date, add 1 to the iteration column each recursion
select cte.ID, cte.START_TIME, mt.END_TIME, cte.Iteration + 1,cte.REG_NUMBER
from cte
inner join #tmpACTIVITIES mt on (mt.REG_NUMBER=cte.REG_NUMBER) AND (mt.START_TIME = cte.END_TIME)
)
, cte2 as
(
--get all records / invert the Iteration (so the last record in a window has value r=1)
select ID, START_TIME, END_TIME, REG_NUMBER ,row_number() over (partition by REG_NUMBER,ID order by Iteration desc) r
from cte
)
--select all records where r=1 (i.e. the last created by the recursive cte, giving the largest range of start-to-end date for each id
select ID, START_TIME, END_TIME,REG_NUMBER
from cte2
where r = 1