Oracle Sort of gaps and island 查询

Oracle Sort of gaps and island query

与其写长句子和段落,不如展示数据和我想要实现的目标:

create table ssb_price (itm_no varchar2(10), price number, price_code varchar2(10), valid_from_dt date, valid_to_dt date);

insert into ssb_price values ('A001', 83, 'AB', '01-JAN-21', '05-JAN-21');
insert into ssb_price values ('A001', 83, 'AB', '06-JAN-21', '12-JAN-21');
insert into ssb_price values ('A001', 98, 'SPQ', '13-JAN-21', '17-JAN-21');
insert into ssb_price values ('A001', 83, 'AB', '19-JAN-21', '24-JAN-21');
insert into ssb_price values ('A001', 83, 'DE', '25-JAN-21', '30-JAN-21');
insert into ssb_price values ('A001', 83, 'DE', '31-JAN-21', '04-FEB-21');
insert into ssb_price values ('A001', 77, 'XY', '07-FEB-21', '12-FEB-21');
insert into ssb_price values ('A001', 77, 'XY', '15-FEB-21', '20-FEB-21');
insert into ssb_price values ('A001', 62, 'SD', '23-FEB-21', '26-FEB-21');
insert into ssb_price values ('A001', 59, 'SD', '26-FEB-21', '03-MAR-21');

对于特定的 itm_no 和价格,如果开始日期和结束日期是连续的,那么我应该得到那个值。对于价格 77,当前日期和下一个日期之间有 2 天的间隔(第 13 天和第 14 天),因此它不是连续的。让我粘贴所需的输出应该是什么样子:(从 excel 截取)

我问过这个问题和另一个 post。但是 post 已经过时并且没有得到任何反馈所以创建这个。请让我知道我是否应该将此 post 与上一个合并。

这基本上是一个缺口和孤岛问题。但是您不想通过聚合来减少行数,而是想在最后一步使用 window 函数。

在您的数据中,时间范围整齐地平铺。这建议使用 lag() 和累积总和来定义组:

select p.*,
       min(valid_from_dt) over (partition by itm_no, price, price_code, grp) as new_valid_from_dt,
       max(valid_to_dt) over (partition by itm_no, price, price_code, grp) as new_valid_to_dt
from (select p.*,
             sum(case when valid_from_dt = prev_valid_to_dt + interval '1' day then 0 else 1 end) over 
                   (partition by itm_no, price, price_code order by valid_from_dt) as grp
      from (select p.*,
                   lag(valid_to_dt) over (partition by itm_no, price, price_code order by valid_from_dt) as prev_valid_to_dt
            from ssb_price p 
           ) p
     ) p
order by itm_no, valid_from_dt;

Here 是一个 db<>fiddle.

从 Oracle 12 开始,您可以使用 MATCH_RECOGNIZE:

SELECT itm_no,
       price,
       price_code,
       valid_from_dt,
       valid_to_dt,
       MIN( valid_from_dt ) OVER ( PARTITION BY itm_no, mnum ) AS new_valid_from_dt,
       MAX( valid_to_dt ) OVER ( PARTITION BY itm_no, mnum ) AS new_valid_to_dt
FROM   ssb_price
MATCH_RECOGNIZE(
  PARTITION BY itm_no
  ORDER     BY valid_from_dt, valid_to_dt
  MEASURES
    MATCH_NUMBER() AS mnum
  ALL ROWS PER MATCH
  PATTERN ( start_range continued_range* )
  DEFINE
    continued_range AS (
      valid_from_dt = PREV( valid_to_dt ) + 1
      AND price = PREV( price )
    )
)

并且,从 Oracle 10g 开始,您可以使用 MODEL 子句:

SELECT itm_no,
       price,
       price_code,
       valid_from_dt,
       valid_to_dt,
       mn,
       MIN( valid_from_dt ) OVER ( PARTITION BY itm_no, mn ) AS new_valid_from_dt,
       MAX( valid_to_dt ) OVER ( PARTITION BY itm_no, mn ) AS new_valid_to_dt
FROM   (
  SELECT *
  FROM   (
    SELECT s.*,
           ROW_NUMBER() OVER ( PARTITION BY itm_no ORDER BY valid_from_dt ) AS rn
    FROM   ssb_price s
  )
  MODEL
    PARTITION BY ( itm_no )
    DIMENSION BY ( rn )
    MEASURES ( price, price_code, valid_from_dt, valid_to_dt, 1 AS mn )
    RULES (
      mn[rn>1] = mn[cv(rn)-1]
                 +
                 CASE
                 WHEN valid_from_dt[cv(rn)] = valid_to_dt[cv(rn)-1] + 1
                 AND  price[cv(rn)] = price[cv(rn) - 1]
                 THEN 0
                 ELSE 1
                 END
    )
)

其中,对于示例数据:

create table ssb_price (itm_no, price, price_code, valid_from_dt, valid_to_dt) AS
SELECT 'A001', 83, 'AB', DATE '2021-01-01', DATE '2021-01-05' FROM DUAL UNION ALL
SELECT 'A001', 83, 'AB', DATE '2021-01-06', DATE '2021-01-12' FROM DUAL UNION ALL
SELECT 'A001', 98, 'SPQ', DATE '2021-01-13', DATE '2021-01-17' FROM DUAL UNION ALL
SELECT 'A001', 83, 'AB', DATE '2021-01-19', DATE '2021-01-24' FROM DUAL UNION ALL
SELECT 'A001', 83, 'DE', DATE '2021-01-25', DATE '2021-01-30' FROM DUAL UNION ALL
SELECT 'A001', 83, 'DE', DATE '2021-01-31', DATE '2021-02-04' FROM DUAL UNION ALL
SELECT 'A001', 77, 'XY', DATE '2021-02-07', DATE '2021-02-12' FROM DUAL UNION ALL
SELECT 'A001', 77, 'XY', DATE '2021-02-15', DATE '2021-02-20' FROM DUAL UNION ALL
SELECT 'A001', 62, 'SD', DATE '2021-02-23', DATE '2021-02-26' FROM DUAL UNION ALL
SELECT 'A001', 59, 'SD', DATE '2021-02-26', DATE '2021-03-03' FROM DUAL;

输出:

ITM_NO PRICE PRICE_CODE VALID_FROM_DT VALID_TO_DT NEW_VALID_FROM_DT NEW_VALID_TO_DT
A001 83 AB 2021-01-01 00:00:00 2021-01-05 00:00:00 2021-01-01 00:00:00 2021-01-12 00:00:00
A001 83 AB 2021-01-06 00:00:00 2021-01-12 00:00:00 2021-01-01 00:00:00 2021-01-12 00:00:00
A001 98 SPQ 2021-01-13 00:00:00 2021-01-17 00:00:00 2021-01-13 00:00:00 2021-01-17 00:00:00
A001 83 AB 2021-01-19 00:00:00 2021-01-24 00:00:00 2021-01-19 00:00:00 2021-02-04 00:00:00
A001 83 DE 2021-01-25 00:00:00 2021-01-30 00:00:00 2021-01-19 00:00:00 2021-02-04 00:00:00
A001 83 DE 2021-01-31 00:00:00 2021-02-04 00:00:00 2021-01-19 00:00:00 2021-02-04 00:00:00
A001 77 XY 2021-02-07 00:00:00 2021-02-12 00:00:00 2021-02-07 00:00:00 2021-02-12 00:00:00
A001 77 XY 2021-02-15 00:00:00 2021-02-20 00:00:00 2021-02-15 00:00:00 2021-02-20 00:00:00
A001 62 SD 2021-02-23 00:00:00 2021-02-26 00:00:00 2021-02-23 00:00:00 2021-02-26 00:00:00
A001 59 SD 2021-02-26 00:00:00 2021-03-03 00:00:00 2021-02-26 00:00:00 2021-03-03 00:00:00

db<>fiddle here