Group by Clause 仅针对特定记录
Group by Clause for specific records alone
我正计划为客户拉平我的开始和结束日期,如图所示below.If日期范围是连续的,我将合并这些记录。否则我会保持原样
例如:
Input
Customer START END
A 2000 2001
A 2001 2007
A 2009 2010
A 2011 2015
Expected Output
A 2000 2007
A 2009 2010
A 2011 2015
使用分析函数,我能够用连续日期标记记录:
--TAG = 1 means continuous
select A *,
CASE WHEN LEAD (START) OVER (PARTITION BY CUSTOMER ORDER BY START,END) = END
OR LAG (END_DT) OVER (PARTITION BY CUSTOMER ORDER BY START,END) = START
THEN 1 ELSE 0 END AS CONT_FLG
From TABLE CUSTOMER
Customer START END CONT_FLG
A 2000 2001 1
A 2001 2007 1
A 2009 2010 0
A 2011 2015 0
但我无法继续按客户对 min (START) 和 max (END) 进行分组,因为它还会合并非连续值。有什么好的建议
试试
select customer, min(start), min(end)
from
(
select A *,
CASE WHEN LEAD (START) OVER (PARTITION BY CUSTOMER ORDER BY START,END) = END
OR LAG (END_DT) OVER (PARTITION BY CUSTOMER ORDER BY START,END) = START
THEN 1 ELSE 0 END AS CONT_FLG
From TABLE CUSTOMER
)
group by customer, cont_flg, case when cont_flg=0 then start end
如果您捕获实际的 lead/lag 日期而不是 0/1,那么您会得到类似这样的结果:
select t.*,
case when lag(end_dt) over (partition by customer order by start_dt)
= start_dt then null else start_dt end as adj_start_dt,
case when lead(start_dt) over (partition by customer order by start_dt)
= end_dt then null else end_dt end as adj_end_dt
from t42 t
order by customer, start_dt;
CUSTOMER START_DT END_DT ADJ_START_DT ADJ_END_DT
-------- ---------- ---------- ------------ ----------
A 2000 2001 2000
A 2001 2003
A 2003 2007 2007
A 2009 2010 2009 2010
A 2011 2015 2011 2015
为了效果,我已将您的第二条记录拆分为两条相邻的记录,因此此处有一行机器人调整日期为空。然后你可以删除那些都为空的,因为它们反映了完全在一个范围内的记录,你剩下每个时期的开始和结束日期:
select *
from (
select t.*,
case when lag(end_dt) over (partition by customer order by start_dt)
= start_dt then null else start_dt end as adj_start_dt,
case when lead(start_dt) over (partition by customer order by start_dt)
= end_dt then null else end_dt end as adj_end_dt
from t42 t
)
where adj_start_dt is not null or adj_end_dt is not null
order by customer, start_dt;
CUSTOMER START_DT END_DT ADJ_START_DT ADJ_END_DT
-------- ---------- ---------- ------------ ----------
A 2000 2001 2000
A 2003 2007 2007
A 2009 2010 2009 2010
A 2011 2015 2011 2015
然后您可以折叠那些带有空值的行,因为相邻的行(lead/lag)现在是相关的:
select distinct customer,
case when adj_start_dt is null then
lag(adj_start_dt) over (partition by customer order by start_dt)
else adj_start_dt end as grp_start_dt,
case when adj_end_dt is null then
lead(adj_end_dt) over (partition by customer order by start_dt)
else adj_end_dt end as grp_end_dt
from (
select t.*,
case when lag(end_dt) over (partition by customer order by start_dt)
= start_dt then null else start_dt end as adj_start_dt,
case when lead(start_dt) over (partition by customer order by start_dt)
= end_dt then null else end_dt end as adj_end_dt
from t42 t
)
where adj_start_dt is not null or adj_end_dt is not null
order by customer, grp_start_dt;
CUSTOMER GRP_START_DT GRP_END_DT
-------- ------------ ----------
A 2000 2007
A 2009 2010
A 2011 2015
我正计划为客户拉平我的开始和结束日期,如图所示below.If日期范围是连续的,我将合并这些记录。否则我会保持原样 例如:
Input
Customer START END
A 2000 2001
A 2001 2007
A 2009 2010
A 2011 2015
Expected Output
A 2000 2007
A 2009 2010
A 2011 2015
使用分析函数,我能够用连续日期标记记录:
--TAG = 1 means continuous
select A *,
CASE WHEN LEAD (START) OVER (PARTITION BY CUSTOMER ORDER BY START,END) = END
OR LAG (END_DT) OVER (PARTITION BY CUSTOMER ORDER BY START,END) = START
THEN 1 ELSE 0 END AS CONT_FLG
From TABLE CUSTOMER
Customer START END CONT_FLG
A 2000 2001 1
A 2001 2007 1
A 2009 2010 0
A 2011 2015 0
但我无法继续按客户对 min (START) 和 max (END) 进行分组,因为它还会合并非连续值。有什么好的建议
试试
select customer, min(start), min(end)
from
(
select A *,
CASE WHEN LEAD (START) OVER (PARTITION BY CUSTOMER ORDER BY START,END) = END
OR LAG (END_DT) OVER (PARTITION BY CUSTOMER ORDER BY START,END) = START
THEN 1 ELSE 0 END AS CONT_FLG
From TABLE CUSTOMER
)
group by customer, cont_flg, case when cont_flg=0 then start end
如果您捕获实际的 lead/lag 日期而不是 0/1,那么您会得到类似这样的结果:
select t.*,
case when lag(end_dt) over (partition by customer order by start_dt)
= start_dt then null else start_dt end as adj_start_dt,
case when lead(start_dt) over (partition by customer order by start_dt)
= end_dt then null else end_dt end as adj_end_dt
from t42 t
order by customer, start_dt;
CUSTOMER START_DT END_DT ADJ_START_DT ADJ_END_DT
-------- ---------- ---------- ------------ ----------
A 2000 2001 2000
A 2001 2003
A 2003 2007 2007
A 2009 2010 2009 2010
A 2011 2015 2011 2015
为了效果,我已将您的第二条记录拆分为两条相邻的记录,因此此处有一行机器人调整日期为空。然后你可以删除那些都为空的,因为它们反映了完全在一个范围内的记录,你剩下每个时期的开始和结束日期:
select *
from (
select t.*,
case when lag(end_dt) over (partition by customer order by start_dt)
= start_dt then null else start_dt end as adj_start_dt,
case when lead(start_dt) over (partition by customer order by start_dt)
= end_dt then null else end_dt end as adj_end_dt
from t42 t
)
where adj_start_dt is not null or adj_end_dt is not null
order by customer, start_dt;
CUSTOMER START_DT END_DT ADJ_START_DT ADJ_END_DT
-------- ---------- ---------- ------------ ----------
A 2000 2001 2000
A 2003 2007 2007
A 2009 2010 2009 2010
A 2011 2015 2011 2015
然后您可以折叠那些带有空值的行,因为相邻的行(lead/lag)现在是相关的:
select distinct customer,
case when adj_start_dt is null then
lag(adj_start_dt) over (partition by customer order by start_dt)
else adj_start_dt end as grp_start_dt,
case when adj_end_dt is null then
lead(adj_end_dt) over (partition by customer order by start_dt)
else adj_end_dt end as grp_end_dt
from (
select t.*,
case when lag(end_dt) over (partition by customer order by start_dt)
= start_dt then null else start_dt end as adj_start_dt,
case when lead(start_dt) over (partition by customer order by start_dt)
= end_dt then null else end_dt end as adj_end_dt
from t42 t
)
where adj_start_dt is not null or adj_end_dt is not null
order by customer, grp_start_dt;
CUSTOMER GRP_START_DT GRP_END_DT
-------- ------------ ----------
A 2000 2007
A 2009 2010
A 2011 2015