分层查询:connect_by_root不给根值

Hierarchical query: connect_by_root does not give root value

我这里有一个这样的数据集:

BP_ID VALID_FROM VALID_TO LIMIT
1 15.05.1999 16.07.2000 100
1 01.01.2020 10.01.2020 100
1 10.01.2020 31.12.9999 100

我想“聚合”到单个记录,因为这个 BP_ID:

BP_ID VALID_FROM VALID_TO LIMIT
1 15.05.1999 16.07.2000 100
1 01.01.2020 31.12.9999 100

我认为实现此目的的一个好主意是使用分层查询并找到根 valid_from。所以我尝试了:

with pretab as (

select 1 as bp_id, 
       to_date('15.05.1999', 'dd.mm.yyyy') as valid_from,
       to_date('16.07.2000', 'dd.mm.yyyy') as valid_to,
       100 as limit 
  from dual
union
    select 1 as bp_id, 
           to_date('01.01.2020', 'dd.mm.yyyy') as valid_from,
           to_date('10.01.2020', 'dd.mm.yyyy') as valid_to,
           100 as limit 
      from dual
union
    select 1 as bp_id, 
           to_date('10.01.2020', 'dd.mm.yyyy') as valid_from,
          to_date('31.12.9999', 'dd.mm.yyyy') as valid_to,
           100 as limit 
      from dual

),
pretab2 as (
select t1.*, case when 
                  valid_to != coalesce(lead(valid_from) over (partition by bp_id order by valid_from), to_date('31.12.9999', 'dd.mm.yyyy'))
                  or limit !=  coalesce(lead(limit) over (partition by bp_id order by valid_from), 0)                  
              then 1 
              else 0 end as start_dummy from pretab t1
)


select bp_id, connect_by_root(valid_from), valid_to, limit from pretab2
CONNECT BY PRIOR bp_id = bp_id
and prior trunc(valid_to) = trunc(valid_from)
and prior limit = limit
start with start_dummy = 1;

不幸的是,对于 connect_by_root(),第二行没有 return 01.01.2020。这是为什么?我将如何更改查询以便它为我提供此值?

您的 start_dummy 计算似乎没有 return 查询应该 return 的正确行。

如果您想使用 CONNECT BY,这里有一个示例,可以 return 您期望的结果:

WITH
    pretab
    AS
        (SELECT 1                                        AS bp_id,
                TO_DATE ('15.05.1999', 'dd.mm.yyyy')     AS valid_from,
                TO_DATE ('01.01.2020', 'dd.mm.yyyy')     AS valid_to,
                100                                      AS LIMIT
           FROM DUAL
         UNION
         SELECT 1                                        AS bp_id,
                TO_DATE ('01.01.2000', 'dd.mm.yyyy')     AS valid_from,
                TO_DATE ('10.01.2000', 'dd.mm.yyyy')     AS valid_to,
                100                                      AS LIMIT
           FROM DUAL
         UNION
         SELECT 1                                        AS bp_id,
                TO_DATE ('10.01.2000', 'dd.mm.yyyy')     AS valid_from,
                TO_DATE ('31.12.9999', 'dd.mm.yyyy')     AS valid_to,
                100                                      AS LIMIT
           FROM DUAL)
    SELECT bp_id,
           CONNECT_BY_ROOT (valid_from),
           valid_to,
           LIMIT
      FROM pretab
CONNECT BY     PRIOR bp_id = bp_id
           AND PRIOR TRUNC (valid_to) = TRUNC (valid_from)
           AND PRIOR LIMIT = LIMIT
  ORDER BY LEVEL DESC
     FETCH FIRST 1 ROWS ONLY;

另一个可行的解决方案是使用 GROUP BY。这只有在您确信日期不会有任何差距时才有效。

WITH
    pretab
    AS
        (SELECT 1                                        AS bp_id,
                TO_DATE ('15.05.1999', 'dd.mm.yyyy')     AS valid_from,
                TO_DATE ('01.01.2020', 'dd.mm.yyyy')     AS valid_to,
                100                                      AS LIMIT
           FROM DUAL
         UNION
         SELECT 1                                        AS bp_id,
                TO_DATE ('01.01.2000', 'dd.mm.yyyy')     AS valid_from,
                TO_DATE ('10.01.2000', 'dd.mm.yyyy')     AS valid_to,
                100                                      AS LIMIT
           FROM DUAL
         UNION
         SELECT 1                                        AS bp_id,
                TO_DATE ('10.01.2000', 'dd.mm.yyyy')     AS valid_from,
                TO_DATE ('31.12.9999', 'dd.mm.yyyy')     AS valid_to,
                100                                      AS LIMIT
           FROM DUAL)
  SELECT bp_id,
         MIN (valid_from),
         MAX (valid_to),
         LIMIT
    FROM pretab
GROUP BY bp_id, LIMIT;

从 Oracle 12 开始,您可以使用 MATCH_RECOGNIZE 执行 row-by-row 处理:

with pretab (bp_id, valid_from, valid_to, limit) as (
  select 1, DATE '1999-05-15', DATE '2020-01-01', 100 from dual union all
  select 1, DATE '2000-01-01', DATE '2000-01-10', 100 from dual union all
  select 1, DATE '2000-01-10', DATE '9999-12-31', 100 from dual union all
  select 2, DATE '2000-01-01', DATE '2001-12-31', 100 from dual union all
  select 2, DATE '2002-01-01', DATE '2002-12-31', 100 from dual union all
  select 2, DATE '2003-01-01', DATE '2003-12-31', 200 from dual union all
  select 2, DATE '2004-01-01', DATE '2006-12-31', 100 from dual union all
  select 2, DATE '2005-01-01', DATE '2005-12-31', 100 from dual
)
SELECT *
FROM   pretab
MATCH_RECOGNIZE(
  PARTITION BY bp_id
  ORDER     BY valid_from, valid_to
  MEASURES
    FIRST(limit)      AS limit,
    FIRST(valid_from) AS valid_from,
    MAX(valid_to)     AS valid_to
  PATTERN (same_overlapping_limit+)
  DEFINE
    same_overlapping_limit AS
          limit      = FIRST(limit)
      AND valid_from - INTERVAL '1' DAY <= MAX(same_overlapping_limit.valid_to)
)

其中,输出:

BP_ID LIMIT VALID_FROM VALID_TO
1 100 1999-05-15 00:00:00 9999-12-31 00:00:00
2 100 2000-01-01 00:00:00 2002-12-31 00:00:00
2 200 2003-01-01 00:00:00 2003-12-31 00:00:00
2 100 2004-01-01 00:00:00 2006-12-31 00:00:00

db<>fiddle here

同样 - 如果您的数据没有间隙并且一切都是连续的,您可以尝试“会话化”:添加一个每次“限制”时递增的会话 ID(我将其重命名为 lim 因为它可以成为保留字)变化。 首先,您添加一个计数器,如果 lim 与之前的 lim 不同,该计数器会更改 - 使用 OLAP LAG() 函数。 其次,您在外部查询中使用计数器对其执行 运行 求和。这为您提供了会话 ID。 最后,select 从带有会话 ID 的查询中获取最小起始日期和最大截止日期,按 bp_idlimsession_id 分组。

WITH
indata(bp_id,valid_from,valid_to,lim) as (                                             
          SELECT 1,DATE '1999-05-15',DATE '2020-01-01',100 FROM dual
UNION ALL SELECT 1,DATE '2020-01-01',DATE '2020-01-10',100 FROM dual
UNION ALL SELECT 1,DATE '2020-01-10',DATE '2020-05-15',100 FROM dual
UNION ALL SELECT 1,DATE '2020-05-15',DATE '2021-01-01',200 FROM dual
UNION ALL SELECT 1,DATE '2021-01-01',DATE '2021-01-10',200 FROM dual
UNION ALL SELECT 1,DATE '2021-01-10',DATE '9999-12-31',200 FROM dual
)
,
with_counter AS (
  SELECT
    bp_id
  , valid_from
  , valid_to
  , lim
  , CASE WHEN lim <> LAG(lim) OVER(PARTITION BY bp_id ORDER BY valid_from)
      THEN 1
      ELSE 0
    END AS counter
  FROM indata
)
,
with_session_id AS (
  SELECT
    bp_id
  , valid_from
  , valid_to
  , lim
  , SUM(counter) OVER(PARTITION BY bp_id ORDER BY valid_from) AS session_id
  FROM with_counter
)
SELECT
  bp_id
, MIN(valid_from) AS valid_from
, MAX(valid_to)   AS valid_to
, lim
FROM with_session_id
GROUP BY
  bp_id
, lim
, session_id;
-- out  bp_id | valid_from |  valid_to  | lim 
-- out -------+------------+------------+-----
-- out      1 | 1999-05-15 | 2020-05-15 | 100
-- out      1 | 2020-05-15 | 9999-12-31 | 200

正确的 CONNECT BY 版本可能看起来像

with pretab as (

select 1 as bp_id, 
       to_date('15.05.1999', 'dd.mm.yyyy') as valid_from,
       to_date('01.01.2000', 'dd.mm.yyyy') as valid_to,
       100 as limit 
  from dual
union
    select 1 as bp_id, 
           to_date('01.01.2000', 'dd.mm.yyyy') as valid_from,
           to_date('10.01.2000', 'dd.mm.yyyy') as valid_to,
           100 as limit 
      from dual
union
    select 1 as bp_id, 
           to_date('10.01.2000', 'dd.mm.yyyy') as valid_from,
          to_date('31.12.9999', 'dd.mm.yyyy') as valid_to,
           100 as limit 
      from dual
union
select 2 as bp_id, 
       to_date('15.05.1999', 'dd.mm.yyyy') as valid_from,
       to_date('01.01.2002', 'dd.mm.yyyy') as valid_to,
       200 as limit 
  from dual
union
    select 2 as bp_id, 
           to_date('01.01.2002', 'dd.mm.yyyy') as valid_from,
           to_date('10.01.2002', 'dd.mm.yyyy') as valid_to,
           200 as limit 
      from dual
union
    select 2 as bp_id, 
           to_date('10.01.2002', 'dd.mm.yyyy') as valid_from,
          to_date('31.12.9999', 'dd.mm.yyyy') as valid_to,
           200 as limit 
      from dual
),
pretab2 as (
select t1.*, case when 
                  valid_from != lag(valid_to, 1, valid_from - 1) over (partition by bp_id order by valid_from)
                  or limit != lag(limit, 1, limit - 1) over (partition by bp_id order by valid_from)                  
              then 1 
              else 0 end as start_dummy 
from pretab t1
)
select bp_id, valid_from, valid_to, limit
from (
   select bp_id, connect_by_root(valid_from) valid_from, valid_to, limit, row_number() over(partition by bp_id order by level desc) rn
   from pretab2
   CONNECT BY PRIOR bp_id = bp_id
      and prior trunc(valid_to) = trunc(valid_from)
      and prior limit = limit 
   start with start_dummy = 1
) where rn = 1;

Returns

BP_ID   VALID_FROM  VALID_TO    LIMIT
1   1999-05-15 00:00:00 9999-12-31 00:00:00 100
2   1999-05-15 00:00:00 9999-12-31 00:00:00 200