Oracle Sort of gaps and island 查询
Oracle Sort of gaps and island query
与其写长句子和段落,不如展示数据和我想要实现的目标:
create table ssb_price (itm_no varchar2(10), price number, price_code varchar2(10), valid_from_dt date, valid_to_dt date);
insert into ssb_price values ('A001', 83, 'AB', '01-JAN-21', '05-JAN-21');
insert into ssb_price values ('A001', 83, 'AB', '06-JAN-21', '12-JAN-21');
insert into ssb_price values ('A001', 98, 'SPQ', '13-JAN-21', '17-JAN-21');
insert into ssb_price values ('A001', 83, 'AB', '19-JAN-21', '24-JAN-21');
insert into ssb_price values ('A001', 83, 'DE', '25-JAN-21', '30-JAN-21');
insert into ssb_price values ('A001', 83, 'DE', '31-JAN-21', '04-FEB-21');
insert into ssb_price values ('A001', 77, 'XY', '07-FEB-21', '12-FEB-21');
insert into ssb_price values ('A001', 77, 'XY', '15-FEB-21', '20-FEB-21');
insert into ssb_price values ('A001', 62, 'SD', '23-FEB-21', '26-FEB-21');
insert into ssb_price values ('A001', 59, 'SD', '26-FEB-21', '03-MAR-21');
对于特定的 itm_no 和价格,如果开始日期和结束日期是连续的,那么我应该得到那个值。对于价格 77,当前日期和下一个日期之间有 2 天的间隔(第 13 天和第 14 天),因此它不是连续的。让我粘贴所需的输出应该是什么样子:(从 excel 截取)
我问过这个问题和另一个 post。但是 post 已经过时并且没有得到任何反馈所以创建这个。请让我知道我是否应该将此 post 与上一个合并。
这基本上是一个缺口和孤岛问题。但是您不想通过聚合来减少行数,而是想在最后一步使用 window 函数。
在您的数据中,时间范围整齐地平铺。这建议使用 lag()
和累积总和来定义组:
select p.*,
min(valid_from_dt) over (partition by itm_no, price, price_code, grp) as new_valid_from_dt,
max(valid_to_dt) over (partition by itm_no, price, price_code, grp) as new_valid_to_dt
from (select p.*,
sum(case when valid_from_dt = prev_valid_to_dt + interval '1' day then 0 else 1 end) over
(partition by itm_no, price, price_code order by valid_from_dt) as grp
from (select p.*,
lag(valid_to_dt) over (partition by itm_no, price, price_code order by valid_from_dt) as prev_valid_to_dt
from ssb_price p
) p
) p
order by itm_no, valid_from_dt;
Here 是一个 db<>fiddle.
从 Oracle 12 开始,您可以使用 MATCH_RECOGNIZE
:
SELECT itm_no,
price,
price_code,
valid_from_dt,
valid_to_dt,
MIN( valid_from_dt ) OVER ( PARTITION BY itm_no, mnum ) AS new_valid_from_dt,
MAX( valid_to_dt ) OVER ( PARTITION BY itm_no, mnum ) AS new_valid_to_dt
FROM ssb_price
MATCH_RECOGNIZE(
PARTITION BY itm_no
ORDER BY valid_from_dt, valid_to_dt
MEASURES
MATCH_NUMBER() AS mnum
ALL ROWS PER MATCH
PATTERN ( start_range continued_range* )
DEFINE
continued_range AS (
valid_from_dt = PREV( valid_to_dt ) + 1
AND price = PREV( price )
)
)
并且,从 Oracle 10g 开始,您可以使用 MODEL
子句:
SELECT itm_no,
price,
price_code,
valid_from_dt,
valid_to_dt,
mn,
MIN( valid_from_dt ) OVER ( PARTITION BY itm_no, mn ) AS new_valid_from_dt,
MAX( valid_to_dt ) OVER ( PARTITION BY itm_no, mn ) AS new_valid_to_dt
FROM (
SELECT *
FROM (
SELECT s.*,
ROW_NUMBER() OVER ( PARTITION BY itm_no ORDER BY valid_from_dt ) AS rn
FROM ssb_price s
)
MODEL
PARTITION BY ( itm_no )
DIMENSION BY ( rn )
MEASURES ( price, price_code, valid_from_dt, valid_to_dt, 1 AS mn )
RULES (
mn[rn>1] = mn[cv(rn)-1]
+
CASE
WHEN valid_from_dt[cv(rn)] = valid_to_dt[cv(rn)-1] + 1
AND price[cv(rn)] = price[cv(rn) - 1]
THEN 0
ELSE 1
END
)
)
其中,对于示例数据:
create table ssb_price (itm_no, price, price_code, valid_from_dt, valid_to_dt) AS
SELECT 'A001', 83, 'AB', DATE '2021-01-01', DATE '2021-01-05' FROM DUAL UNION ALL
SELECT 'A001', 83, 'AB', DATE '2021-01-06', DATE '2021-01-12' FROM DUAL UNION ALL
SELECT 'A001', 98, 'SPQ', DATE '2021-01-13', DATE '2021-01-17' FROM DUAL UNION ALL
SELECT 'A001', 83, 'AB', DATE '2021-01-19', DATE '2021-01-24' FROM DUAL UNION ALL
SELECT 'A001', 83, 'DE', DATE '2021-01-25', DATE '2021-01-30' FROM DUAL UNION ALL
SELECT 'A001', 83, 'DE', DATE '2021-01-31', DATE '2021-02-04' FROM DUAL UNION ALL
SELECT 'A001', 77, 'XY', DATE '2021-02-07', DATE '2021-02-12' FROM DUAL UNION ALL
SELECT 'A001', 77, 'XY', DATE '2021-02-15', DATE '2021-02-20' FROM DUAL UNION ALL
SELECT 'A001', 62, 'SD', DATE '2021-02-23', DATE '2021-02-26' FROM DUAL UNION ALL
SELECT 'A001', 59, 'SD', DATE '2021-02-26', DATE '2021-03-03' FROM DUAL;
输出:
ITM_NO
PRICE
PRICE_CODE
VALID_FROM_DT
VALID_TO_DT
NEW_VALID_FROM_DT
NEW_VALID_TO_DT
A001
83
AB
2021-01-01 00:00:00
2021-01-05 00:00:00
2021-01-01 00:00:00
2021-01-12 00:00:00
A001
83
AB
2021-01-06 00:00:00
2021-01-12 00:00:00
2021-01-01 00:00:00
2021-01-12 00:00:00
A001
98
SPQ
2021-01-13 00:00:00
2021-01-17 00:00:00
2021-01-13 00:00:00
2021-01-17 00:00:00
A001
83
AB
2021-01-19 00:00:00
2021-01-24 00:00:00
2021-01-19 00:00:00
2021-02-04 00:00:00
A001
83
DE
2021-01-25 00:00:00
2021-01-30 00:00:00
2021-01-19 00:00:00
2021-02-04 00:00:00
A001
83
DE
2021-01-31 00:00:00
2021-02-04 00:00:00
2021-01-19 00:00:00
2021-02-04 00:00:00
A001
77
XY
2021-02-07 00:00:00
2021-02-12 00:00:00
2021-02-07 00:00:00
2021-02-12 00:00:00
A001
77
XY
2021-02-15 00:00:00
2021-02-20 00:00:00
2021-02-15 00:00:00
2021-02-20 00:00:00
A001
62
SD
2021-02-23 00:00:00
2021-02-26 00:00:00
2021-02-23 00:00:00
2021-02-26 00:00:00
A001
59
SD
2021-02-26 00:00:00
2021-03-03 00:00:00
2021-02-26 00:00:00
2021-03-03 00:00:00
db<>fiddle here
与其写长句子和段落,不如展示数据和我想要实现的目标:
create table ssb_price (itm_no varchar2(10), price number, price_code varchar2(10), valid_from_dt date, valid_to_dt date);
insert into ssb_price values ('A001', 83, 'AB', '01-JAN-21', '05-JAN-21');
insert into ssb_price values ('A001', 83, 'AB', '06-JAN-21', '12-JAN-21');
insert into ssb_price values ('A001', 98, 'SPQ', '13-JAN-21', '17-JAN-21');
insert into ssb_price values ('A001', 83, 'AB', '19-JAN-21', '24-JAN-21');
insert into ssb_price values ('A001', 83, 'DE', '25-JAN-21', '30-JAN-21');
insert into ssb_price values ('A001', 83, 'DE', '31-JAN-21', '04-FEB-21');
insert into ssb_price values ('A001', 77, 'XY', '07-FEB-21', '12-FEB-21');
insert into ssb_price values ('A001', 77, 'XY', '15-FEB-21', '20-FEB-21');
insert into ssb_price values ('A001', 62, 'SD', '23-FEB-21', '26-FEB-21');
insert into ssb_price values ('A001', 59, 'SD', '26-FEB-21', '03-MAR-21');
对于特定的 itm_no 和价格,如果开始日期和结束日期是连续的,那么我应该得到那个值。对于价格 77,当前日期和下一个日期之间有 2 天的间隔(第 13 天和第 14 天),因此它不是连续的。让我粘贴所需的输出应该是什么样子:(从 excel 截取)
我问过这个问题和另一个 post。但是 post 已经过时并且没有得到任何反馈所以创建这个。请让我知道我是否应该将此 post 与上一个合并。
这基本上是一个缺口和孤岛问题。但是您不想通过聚合来减少行数,而是想在最后一步使用 window 函数。
在您的数据中,时间范围整齐地平铺。这建议使用 lag()
和累积总和来定义组:
select p.*,
min(valid_from_dt) over (partition by itm_no, price, price_code, grp) as new_valid_from_dt,
max(valid_to_dt) over (partition by itm_no, price, price_code, grp) as new_valid_to_dt
from (select p.*,
sum(case when valid_from_dt = prev_valid_to_dt + interval '1' day then 0 else 1 end) over
(partition by itm_no, price, price_code order by valid_from_dt) as grp
from (select p.*,
lag(valid_to_dt) over (partition by itm_no, price, price_code order by valid_from_dt) as prev_valid_to_dt
from ssb_price p
) p
) p
order by itm_no, valid_from_dt;
Here 是一个 db<>fiddle.
从 Oracle 12 开始,您可以使用 MATCH_RECOGNIZE
:
SELECT itm_no,
price,
price_code,
valid_from_dt,
valid_to_dt,
MIN( valid_from_dt ) OVER ( PARTITION BY itm_no, mnum ) AS new_valid_from_dt,
MAX( valid_to_dt ) OVER ( PARTITION BY itm_no, mnum ) AS new_valid_to_dt
FROM ssb_price
MATCH_RECOGNIZE(
PARTITION BY itm_no
ORDER BY valid_from_dt, valid_to_dt
MEASURES
MATCH_NUMBER() AS mnum
ALL ROWS PER MATCH
PATTERN ( start_range continued_range* )
DEFINE
continued_range AS (
valid_from_dt = PREV( valid_to_dt ) + 1
AND price = PREV( price )
)
)
并且,从 Oracle 10g 开始,您可以使用 MODEL
子句:
SELECT itm_no,
price,
price_code,
valid_from_dt,
valid_to_dt,
mn,
MIN( valid_from_dt ) OVER ( PARTITION BY itm_no, mn ) AS new_valid_from_dt,
MAX( valid_to_dt ) OVER ( PARTITION BY itm_no, mn ) AS new_valid_to_dt
FROM (
SELECT *
FROM (
SELECT s.*,
ROW_NUMBER() OVER ( PARTITION BY itm_no ORDER BY valid_from_dt ) AS rn
FROM ssb_price s
)
MODEL
PARTITION BY ( itm_no )
DIMENSION BY ( rn )
MEASURES ( price, price_code, valid_from_dt, valid_to_dt, 1 AS mn )
RULES (
mn[rn>1] = mn[cv(rn)-1]
+
CASE
WHEN valid_from_dt[cv(rn)] = valid_to_dt[cv(rn)-1] + 1
AND price[cv(rn)] = price[cv(rn) - 1]
THEN 0
ELSE 1
END
)
)
其中,对于示例数据:
create table ssb_price (itm_no, price, price_code, valid_from_dt, valid_to_dt) AS
SELECT 'A001', 83, 'AB', DATE '2021-01-01', DATE '2021-01-05' FROM DUAL UNION ALL
SELECT 'A001', 83, 'AB', DATE '2021-01-06', DATE '2021-01-12' FROM DUAL UNION ALL
SELECT 'A001', 98, 'SPQ', DATE '2021-01-13', DATE '2021-01-17' FROM DUAL UNION ALL
SELECT 'A001', 83, 'AB', DATE '2021-01-19', DATE '2021-01-24' FROM DUAL UNION ALL
SELECT 'A001', 83, 'DE', DATE '2021-01-25', DATE '2021-01-30' FROM DUAL UNION ALL
SELECT 'A001', 83, 'DE', DATE '2021-01-31', DATE '2021-02-04' FROM DUAL UNION ALL
SELECT 'A001', 77, 'XY', DATE '2021-02-07', DATE '2021-02-12' FROM DUAL UNION ALL
SELECT 'A001', 77, 'XY', DATE '2021-02-15', DATE '2021-02-20' FROM DUAL UNION ALL
SELECT 'A001', 62, 'SD', DATE '2021-02-23', DATE '2021-02-26' FROM DUAL UNION ALL
SELECT 'A001', 59, 'SD', DATE '2021-02-26', DATE '2021-03-03' FROM DUAL;
输出:
ITM_NO PRICE PRICE_CODE VALID_FROM_DT VALID_TO_DT NEW_VALID_FROM_DT NEW_VALID_TO_DT A001 83 AB 2021-01-01 00:00:00 2021-01-05 00:00:00 2021-01-01 00:00:00 2021-01-12 00:00:00 A001 83 AB 2021-01-06 00:00:00 2021-01-12 00:00:00 2021-01-01 00:00:00 2021-01-12 00:00:00 A001 98 SPQ 2021-01-13 00:00:00 2021-01-17 00:00:00 2021-01-13 00:00:00 2021-01-17 00:00:00 A001 83 AB 2021-01-19 00:00:00 2021-01-24 00:00:00 2021-01-19 00:00:00 2021-02-04 00:00:00 A001 83 DE 2021-01-25 00:00:00 2021-01-30 00:00:00 2021-01-19 00:00:00 2021-02-04 00:00:00 A001 83 DE 2021-01-31 00:00:00 2021-02-04 00:00:00 2021-01-19 00:00:00 2021-02-04 00:00:00 A001 77 XY 2021-02-07 00:00:00 2021-02-12 00:00:00 2021-02-07 00:00:00 2021-02-12 00:00:00 A001 77 XY 2021-02-15 00:00:00 2021-02-20 00:00:00 2021-02-15 00:00:00 2021-02-20 00:00:00 A001 62 SD 2021-02-23 00:00:00 2021-02-26 00:00:00 2021-02-23 00:00:00 2021-02-26 00:00:00 A001 59 SD 2021-02-26 00:00:00 2021-03-03 00:00:00 2021-02-26 00:00:00 2021-03-03 00:00:00
db<>fiddle here