行合并,周期日期
Row lumping, cycle dates
我想查看 lead
类型,如果该类型与该行相同,则合并这些日期以适合一行。
我有以下 table:
id start_dt end_dt type
1 1/1/19 2/21/19 cross
1 2/22/19 6/5/19 cross
1 6/6/19 8/31/19 cross
1 9/1/19 10/3/19 AAAA
1 10/4/19 10/4/19 cross
1 10/5/19 10/6/19 AAAA
1 10/7/19 10/10/19 AAAA
1 10/11/19 12/31/99 cross
预期结果:
id start_dt end_dt type
1 1/1/19 8/31/19 cross
1 9/1/19 10/3/19 AAAA
1 10/4/19 10/4/19 cross
1 10/5/19 10/10/19 AAAA
1 10/11/19 12/31/99 cross
我怎样才能让我的输出看起来像预期的结果?
我已经用lead
lag
rank
和 case expression
进行了测试,但没有什么值得在这里添加的。我走的路对吗?
这是一个 gaps-and-islands
问题。通过 row_number()
分析函数的贡献来解决它的一种选择:
select min(start_dt) as startdate, max(end_dt) as enddate, type
from
(
with t(id, start_dt, end_dt,type) as
(
select 1, date'2019-01-01', date'2019-02-21', 'cross' from dual union all
select 1, date'2019-02-22', date'2019-06-05', 'cross' from dual union all
select 1, date'2019-06-06', date'2019-08-31', 'cross' from dual union all
select 1, date'2019-09-01', date'2019-10-03', 'AAAA' from dual union all
select 1, date'2019-09-04', date'2019-10-04', 'cross' from dual union all
select 1, date'2019-10-05', date'2019-10-06', 'AAAA' from dual union all
select 1, date'2019-10-07', date'2019-10-10', 'AAAA' from dual union all
select 1, date'2019-10-11', date'2019-12-31', 'cross' from dual
)
select type,
row_number() over (partition by id, type order by end_dt) as rn1,
row_number() over (partition by id order by end_dt) as rn2,
start_dt, end_dt
from t
) tt
group by type, rn1 - rn2
order by enddate;
STARTDATE ENDDATE TYPE
--------- --------- -----
01-JAN-19 31-AUG-19 cross
01-SEP-19 03-OCT-19 AAAA
04-SEP-19 04-OCT-19 cross
05-OCT-19 10-OCT-19 AAAA
11-OCT-19 31-DEC-19 cross
实际上我认为这是 Oracle 模式匹配功能的一个很好的案例。
with t(id, start_dt, end_dt,type) as
(
select 1, date'2019-01-01', date'2019-02-21', 'cross' from dual union all
select 1, date'2019-02-22', date'2019-06-05', 'cross' from dual union all
select 1, date'2019-06-06', date'2019-08-31', 'cross' from dual union all
select 1, date'2019-09-01', date'2019-10-03', 'AAAA' from dual union all
select 1, date'2019-09-04', date'2019-10-04', 'cross' from dual union all
select 1, date'2019-10-05', date'2019-10-06', 'AAAA' from dual union all
select 1, date'2019-10-07', date'2019-10-10', 'AAAA' from dual union all
select 1, date'2019-10-11', date'2019-12-31', 'cross' from dual
)
SELECT *
FROM t
MATCH_RECOGNIZE(ORDER BY start_dt
MEASURES a.id AS ID,
A.start_dt AS START_DT,
NVL(LAST(B.end_dt), A.end_dt) AS END_DT,
a.type AS TYPE
PATTERN (A B*)
DEFINE B AS start_dt > PREV(start_dt) AND type = PREV(type));
可以找到有关该主题的详细入门资料here
如果您想查看 相邻 行来查找可以组合的组,那么我建议 lag()
来查找组的起始位置和累积总和:
select id, type, min(start_dt), max(end_dt)
from (select t.*,
sum(case when prev_end_dt >= start_dt - 1 then 0 else 1 end) over (partition by id, type order by start_dt) as grp
from (select t.*,
lag(end_dt) over (partition by id, type order by start_dt) as prev_end_dt
from t
) t
) t
group by id, type, grp
order by id, min(start_dt);
特别是,这会发现 type
没有变化但时间范围存在差距的情况,如 this db<>fiddle for id = 2.
所示
我想查看 lead
类型,如果该类型与该行相同,则合并这些日期以适合一行。
我有以下 table:
id start_dt end_dt type
1 1/1/19 2/21/19 cross
1 2/22/19 6/5/19 cross
1 6/6/19 8/31/19 cross
1 9/1/19 10/3/19 AAAA
1 10/4/19 10/4/19 cross
1 10/5/19 10/6/19 AAAA
1 10/7/19 10/10/19 AAAA
1 10/11/19 12/31/99 cross
预期结果:
id start_dt end_dt type
1 1/1/19 8/31/19 cross
1 9/1/19 10/3/19 AAAA
1 10/4/19 10/4/19 cross
1 10/5/19 10/10/19 AAAA
1 10/11/19 12/31/99 cross
我怎样才能让我的输出看起来像预期的结果?
我已经用lead
lag
rank
和 case expression
进行了测试,但没有什么值得在这里添加的。我走的路对吗?
这是一个 gaps-and-islands
问题。通过 row_number()
分析函数的贡献来解决它的一种选择:
select min(start_dt) as startdate, max(end_dt) as enddate, type
from
(
with t(id, start_dt, end_dt,type) as
(
select 1, date'2019-01-01', date'2019-02-21', 'cross' from dual union all
select 1, date'2019-02-22', date'2019-06-05', 'cross' from dual union all
select 1, date'2019-06-06', date'2019-08-31', 'cross' from dual union all
select 1, date'2019-09-01', date'2019-10-03', 'AAAA' from dual union all
select 1, date'2019-09-04', date'2019-10-04', 'cross' from dual union all
select 1, date'2019-10-05', date'2019-10-06', 'AAAA' from dual union all
select 1, date'2019-10-07', date'2019-10-10', 'AAAA' from dual union all
select 1, date'2019-10-11', date'2019-12-31', 'cross' from dual
)
select type,
row_number() over (partition by id, type order by end_dt) as rn1,
row_number() over (partition by id order by end_dt) as rn2,
start_dt, end_dt
from t
) tt
group by type, rn1 - rn2
order by enddate;
STARTDATE ENDDATE TYPE
--------- --------- -----
01-JAN-19 31-AUG-19 cross
01-SEP-19 03-OCT-19 AAAA
04-SEP-19 04-OCT-19 cross
05-OCT-19 10-OCT-19 AAAA
11-OCT-19 31-DEC-19 cross
实际上我认为这是 Oracle 模式匹配功能的一个很好的案例。
with t(id, start_dt, end_dt,type) as
(
select 1, date'2019-01-01', date'2019-02-21', 'cross' from dual union all
select 1, date'2019-02-22', date'2019-06-05', 'cross' from dual union all
select 1, date'2019-06-06', date'2019-08-31', 'cross' from dual union all
select 1, date'2019-09-01', date'2019-10-03', 'AAAA' from dual union all
select 1, date'2019-09-04', date'2019-10-04', 'cross' from dual union all
select 1, date'2019-10-05', date'2019-10-06', 'AAAA' from dual union all
select 1, date'2019-10-07', date'2019-10-10', 'AAAA' from dual union all
select 1, date'2019-10-11', date'2019-12-31', 'cross' from dual
)
SELECT *
FROM t
MATCH_RECOGNIZE(ORDER BY start_dt
MEASURES a.id AS ID,
A.start_dt AS START_DT,
NVL(LAST(B.end_dt), A.end_dt) AS END_DT,
a.type AS TYPE
PATTERN (A B*)
DEFINE B AS start_dt > PREV(start_dt) AND type = PREV(type));
可以找到有关该主题的详细入门资料here
如果您想查看 相邻 行来查找可以组合的组,那么我建议 lag()
来查找组的起始位置和累积总和:
select id, type, min(start_dt), max(end_dt)
from (select t.*,
sum(case when prev_end_dt >= start_dt - 1 then 0 else 1 end) over (partition by id, type order by start_dt) as grp
from (select t.*,
lag(end_dt) over (partition by id, type order by start_dt) as prev_end_dt
from t
) t
) t
group by id, type, grp
order by id, min(start_dt);
特别是,这会发现 type
没有变化但时间范围存在差距的情况,如 this db<>fiddle for id = 2.