分层查询:connect_by_root不给根值
Hierarchical query: connect_by_root does not give root value
我这里有一个这样的数据集:
BP_ID
VALID_FROM
VALID_TO
LIMIT
1
15.05.1999
16.07.2000
100
1
01.01.2020
10.01.2020
100
1
10.01.2020
31.12.9999
100
我想“聚合”到单个记录,因为这个 BP_ID:
BP_ID
VALID_FROM
VALID_TO
LIMIT
1
15.05.1999
16.07.2000
100
1
01.01.2020
31.12.9999
100
我认为实现此目的的一个好主意是使用分层查询并找到根 valid_from。所以我尝试了:
with pretab as (
select 1 as bp_id,
to_date('15.05.1999', 'dd.mm.yyyy') as valid_from,
to_date('16.07.2000', 'dd.mm.yyyy') as valid_to,
100 as limit
from dual
union
select 1 as bp_id,
to_date('01.01.2020', 'dd.mm.yyyy') as valid_from,
to_date('10.01.2020', 'dd.mm.yyyy') as valid_to,
100 as limit
from dual
union
select 1 as bp_id,
to_date('10.01.2020', 'dd.mm.yyyy') as valid_from,
to_date('31.12.9999', 'dd.mm.yyyy') as valid_to,
100 as limit
from dual
),
pretab2 as (
select t1.*, case when
valid_to != coalesce(lead(valid_from) over (partition by bp_id order by valid_from), to_date('31.12.9999', 'dd.mm.yyyy'))
or limit != coalesce(lead(limit) over (partition by bp_id order by valid_from), 0)
then 1
else 0 end as start_dummy from pretab t1
)
select bp_id, connect_by_root(valid_from), valid_to, limit from pretab2
CONNECT BY PRIOR bp_id = bp_id
and prior trunc(valid_to) = trunc(valid_from)
and prior limit = limit
start with start_dummy = 1;
不幸的是,对于 connect_by_root(),第二行没有 return 01.01.2020。这是为什么?我将如何更改查询以便它为我提供此值?
您的 start_dummy
计算似乎没有 return 查询应该 return 的正确行。
如果您想使用 CONNECT BY
,这里有一个示例,可以 return 您期望的结果:
WITH
pretab
AS
(SELECT 1 AS bp_id,
TO_DATE ('15.05.1999', 'dd.mm.yyyy') AS valid_from,
TO_DATE ('01.01.2020', 'dd.mm.yyyy') AS valid_to,
100 AS LIMIT
FROM DUAL
UNION
SELECT 1 AS bp_id,
TO_DATE ('01.01.2000', 'dd.mm.yyyy') AS valid_from,
TO_DATE ('10.01.2000', 'dd.mm.yyyy') AS valid_to,
100 AS LIMIT
FROM DUAL
UNION
SELECT 1 AS bp_id,
TO_DATE ('10.01.2000', 'dd.mm.yyyy') AS valid_from,
TO_DATE ('31.12.9999', 'dd.mm.yyyy') AS valid_to,
100 AS LIMIT
FROM DUAL)
SELECT bp_id,
CONNECT_BY_ROOT (valid_from),
valid_to,
LIMIT
FROM pretab
CONNECT BY PRIOR bp_id = bp_id
AND PRIOR TRUNC (valid_to) = TRUNC (valid_from)
AND PRIOR LIMIT = LIMIT
ORDER BY LEVEL DESC
FETCH FIRST 1 ROWS ONLY;
另一个可行的解决方案是使用 GROUP BY
。这只有在您确信日期不会有任何差距时才有效。
WITH
pretab
AS
(SELECT 1 AS bp_id,
TO_DATE ('15.05.1999', 'dd.mm.yyyy') AS valid_from,
TO_DATE ('01.01.2020', 'dd.mm.yyyy') AS valid_to,
100 AS LIMIT
FROM DUAL
UNION
SELECT 1 AS bp_id,
TO_DATE ('01.01.2000', 'dd.mm.yyyy') AS valid_from,
TO_DATE ('10.01.2000', 'dd.mm.yyyy') AS valid_to,
100 AS LIMIT
FROM DUAL
UNION
SELECT 1 AS bp_id,
TO_DATE ('10.01.2000', 'dd.mm.yyyy') AS valid_from,
TO_DATE ('31.12.9999', 'dd.mm.yyyy') AS valid_to,
100 AS LIMIT
FROM DUAL)
SELECT bp_id,
MIN (valid_from),
MAX (valid_to),
LIMIT
FROM pretab
GROUP BY bp_id, LIMIT;
从 Oracle 12 开始,您可以使用 MATCH_RECOGNIZE
执行 row-by-row 处理:
with pretab (bp_id, valid_from, valid_to, limit) as (
select 1, DATE '1999-05-15', DATE '2020-01-01', 100 from dual union all
select 1, DATE '2000-01-01', DATE '2000-01-10', 100 from dual union all
select 1, DATE '2000-01-10', DATE '9999-12-31', 100 from dual union all
select 2, DATE '2000-01-01', DATE '2001-12-31', 100 from dual union all
select 2, DATE '2002-01-01', DATE '2002-12-31', 100 from dual union all
select 2, DATE '2003-01-01', DATE '2003-12-31', 200 from dual union all
select 2, DATE '2004-01-01', DATE '2006-12-31', 100 from dual union all
select 2, DATE '2005-01-01', DATE '2005-12-31', 100 from dual
)
SELECT *
FROM pretab
MATCH_RECOGNIZE(
PARTITION BY bp_id
ORDER BY valid_from, valid_to
MEASURES
FIRST(limit) AS limit,
FIRST(valid_from) AS valid_from,
MAX(valid_to) AS valid_to
PATTERN (same_overlapping_limit+)
DEFINE
same_overlapping_limit AS
limit = FIRST(limit)
AND valid_from - INTERVAL '1' DAY <= MAX(same_overlapping_limit.valid_to)
)
其中,输出:
BP_ID
LIMIT
VALID_FROM
VALID_TO
1
100
1999-05-15 00:00:00
9999-12-31 00:00:00
2
100
2000-01-01 00:00:00
2002-12-31 00:00:00
2
200
2003-01-01 00:00:00
2003-12-31 00:00:00
2
100
2004-01-01 00:00:00
2006-12-31 00:00:00
db<>fiddle here
同样 - 如果您的数据没有间隙并且一切都是连续的,您可以尝试“会话化”:添加一个每次“限制”时递增的会话 ID(我将其重命名为 lim 因为它可以成为保留字)变化。
首先,您添加一个计数器,如果 lim
与之前的 lim
不同,该计数器会更改 - 使用 OLAP LAG()
函数。
其次,您在外部查询中使用计数器对其执行 运行 求和。这为您提供了会话 ID。
最后,select 从带有会话 ID 的查询中获取最小起始日期和最大截止日期,按 bp_id
、lim
和 session_id
分组。
WITH
indata(bp_id,valid_from,valid_to,lim) as (
SELECT 1,DATE '1999-05-15',DATE '2020-01-01',100 FROM dual
UNION ALL SELECT 1,DATE '2020-01-01',DATE '2020-01-10',100 FROM dual
UNION ALL SELECT 1,DATE '2020-01-10',DATE '2020-05-15',100 FROM dual
UNION ALL SELECT 1,DATE '2020-05-15',DATE '2021-01-01',200 FROM dual
UNION ALL SELECT 1,DATE '2021-01-01',DATE '2021-01-10',200 FROM dual
UNION ALL SELECT 1,DATE '2021-01-10',DATE '9999-12-31',200 FROM dual
)
,
with_counter AS (
SELECT
bp_id
, valid_from
, valid_to
, lim
, CASE WHEN lim <> LAG(lim) OVER(PARTITION BY bp_id ORDER BY valid_from)
THEN 1
ELSE 0
END AS counter
FROM indata
)
,
with_session_id AS (
SELECT
bp_id
, valid_from
, valid_to
, lim
, SUM(counter) OVER(PARTITION BY bp_id ORDER BY valid_from) AS session_id
FROM with_counter
)
SELECT
bp_id
, MIN(valid_from) AS valid_from
, MAX(valid_to) AS valid_to
, lim
FROM with_session_id
GROUP BY
bp_id
, lim
, session_id;
-- out bp_id | valid_from | valid_to | lim
-- out -------+------------+------------+-----
-- out 1 | 1999-05-15 | 2020-05-15 | 100
-- out 1 | 2020-05-15 | 9999-12-31 | 200
正确的 CONNECT BY
版本可能看起来像
with pretab as (
select 1 as bp_id,
to_date('15.05.1999', 'dd.mm.yyyy') as valid_from,
to_date('01.01.2000', 'dd.mm.yyyy') as valid_to,
100 as limit
from dual
union
select 1 as bp_id,
to_date('01.01.2000', 'dd.mm.yyyy') as valid_from,
to_date('10.01.2000', 'dd.mm.yyyy') as valid_to,
100 as limit
from dual
union
select 1 as bp_id,
to_date('10.01.2000', 'dd.mm.yyyy') as valid_from,
to_date('31.12.9999', 'dd.mm.yyyy') as valid_to,
100 as limit
from dual
union
select 2 as bp_id,
to_date('15.05.1999', 'dd.mm.yyyy') as valid_from,
to_date('01.01.2002', 'dd.mm.yyyy') as valid_to,
200 as limit
from dual
union
select 2 as bp_id,
to_date('01.01.2002', 'dd.mm.yyyy') as valid_from,
to_date('10.01.2002', 'dd.mm.yyyy') as valid_to,
200 as limit
from dual
union
select 2 as bp_id,
to_date('10.01.2002', 'dd.mm.yyyy') as valid_from,
to_date('31.12.9999', 'dd.mm.yyyy') as valid_to,
200 as limit
from dual
),
pretab2 as (
select t1.*, case when
valid_from != lag(valid_to, 1, valid_from - 1) over (partition by bp_id order by valid_from)
or limit != lag(limit, 1, limit - 1) over (partition by bp_id order by valid_from)
then 1
else 0 end as start_dummy
from pretab t1
)
select bp_id, valid_from, valid_to, limit
from (
select bp_id, connect_by_root(valid_from) valid_from, valid_to, limit, row_number() over(partition by bp_id order by level desc) rn
from pretab2
CONNECT BY PRIOR bp_id = bp_id
and prior trunc(valid_to) = trunc(valid_from)
and prior limit = limit
start with start_dummy = 1
) where rn = 1;
Returns
BP_ID VALID_FROM VALID_TO LIMIT
1 1999-05-15 00:00:00 9999-12-31 00:00:00 100
2 1999-05-15 00:00:00 9999-12-31 00:00:00 200
我这里有一个这样的数据集:
BP_ID | VALID_FROM | VALID_TO | LIMIT |
---|---|---|---|
1 | 15.05.1999 | 16.07.2000 | 100 |
1 | 01.01.2020 | 10.01.2020 | 100 |
1 | 10.01.2020 | 31.12.9999 | 100 |
我想“聚合”到单个记录,因为这个 BP_ID:
BP_ID | VALID_FROM | VALID_TO | LIMIT |
---|---|---|---|
1 | 15.05.1999 | 16.07.2000 | 100 |
1 | 01.01.2020 | 31.12.9999 | 100 |
我认为实现此目的的一个好主意是使用分层查询并找到根 valid_from。所以我尝试了:
with pretab as (
select 1 as bp_id,
to_date('15.05.1999', 'dd.mm.yyyy') as valid_from,
to_date('16.07.2000', 'dd.mm.yyyy') as valid_to,
100 as limit
from dual
union
select 1 as bp_id,
to_date('01.01.2020', 'dd.mm.yyyy') as valid_from,
to_date('10.01.2020', 'dd.mm.yyyy') as valid_to,
100 as limit
from dual
union
select 1 as bp_id,
to_date('10.01.2020', 'dd.mm.yyyy') as valid_from,
to_date('31.12.9999', 'dd.mm.yyyy') as valid_to,
100 as limit
from dual
),
pretab2 as (
select t1.*, case when
valid_to != coalesce(lead(valid_from) over (partition by bp_id order by valid_from), to_date('31.12.9999', 'dd.mm.yyyy'))
or limit != coalesce(lead(limit) over (partition by bp_id order by valid_from), 0)
then 1
else 0 end as start_dummy from pretab t1
)
select bp_id, connect_by_root(valid_from), valid_to, limit from pretab2
CONNECT BY PRIOR bp_id = bp_id
and prior trunc(valid_to) = trunc(valid_from)
and prior limit = limit
start with start_dummy = 1;
不幸的是,对于 connect_by_root(),第二行没有 return 01.01.2020。这是为什么?我将如何更改查询以便它为我提供此值?
您的 start_dummy
计算似乎没有 return 查询应该 return 的正确行。
如果您想使用 CONNECT BY
,这里有一个示例,可以 return 您期望的结果:
WITH
pretab
AS
(SELECT 1 AS bp_id,
TO_DATE ('15.05.1999', 'dd.mm.yyyy') AS valid_from,
TO_DATE ('01.01.2020', 'dd.mm.yyyy') AS valid_to,
100 AS LIMIT
FROM DUAL
UNION
SELECT 1 AS bp_id,
TO_DATE ('01.01.2000', 'dd.mm.yyyy') AS valid_from,
TO_DATE ('10.01.2000', 'dd.mm.yyyy') AS valid_to,
100 AS LIMIT
FROM DUAL
UNION
SELECT 1 AS bp_id,
TO_DATE ('10.01.2000', 'dd.mm.yyyy') AS valid_from,
TO_DATE ('31.12.9999', 'dd.mm.yyyy') AS valid_to,
100 AS LIMIT
FROM DUAL)
SELECT bp_id,
CONNECT_BY_ROOT (valid_from),
valid_to,
LIMIT
FROM pretab
CONNECT BY PRIOR bp_id = bp_id
AND PRIOR TRUNC (valid_to) = TRUNC (valid_from)
AND PRIOR LIMIT = LIMIT
ORDER BY LEVEL DESC
FETCH FIRST 1 ROWS ONLY;
另一个可行的解决方案是使用 GROUP BY
。这只有在您确信日期不会有任何差距时才有效。
WITH
pretab
AS
(SELECT 1 AS bp_id,
TO_DATE ('15.05.1999', 'dd.mm.yyyy') AS valid_from,
TO_DATE ('01.01.2020', 'dd.mm.yyyy') AS valid_to,
100 AS LIMIT
FROM DUAL
UNION
SELECT 1 AS bp_id,
TO_DATE ('01.01.2000', 'dd.mm.yyyy') AS valid_from,
TO_DATE ('10.01.2000', 'dd.mm.yyyy') AS valid_to,
100 AS LIMIT
FROM DUAL
UNION
SELECT 1 AS bp_id,
TO_DATE ('10.01.2000', 'dd.mm.yyyy') AS valid_from,
TO_DATE ('31.12.9999', 'dd.mm.yyyy') AS valid_to,
100 AS LIMIT
FROM DUAL)
SELECT bp_id,
MIN (valid_from),
MAX (valid_to),
LIMIT
FROM pretab
GROUP BY bp_id, LIMIT;
从 Oracle 12 开始,您可以使用 MATCH_RECOGNIZE
执行 row-by-row 处理:
with pretab (bp_id, valid_from, valid_to, limit) as (
select 1, DATE '1999-05-15', DATE '2020-01-01', 100 from dual union all
select 1, DATE '2000-01-01', DATE '2000-01-10', 100 from dual union all
select 1, DATE '2000-01-10', DATE '9999-12-31', 100 from dual union all
select 2, DATE '2000-01-01', DATE '2001-12-31', 100 from dual union all
select 2, DATE '2002-01-01', DATE '2002-12-31', 100 from dual union all
select 2, DATE '2003-01-01', DATE '2003-12-31', 200 from dual union all
select 2, DATE '2004-01-01', DATE '2006-12-31', 100 from dual union all
select 2, DATE '2005-01-01', DATE '2005-12-31', 100 from dual
)
SELECT *
FROM pretab
MATCH_RECOGNIZE(
PARTITION BY bp_id
ORDER BY valid_from, valid_to
MEASURES
FIRST(limit) AS limit,
FIRST(valid_from) AS valid_from,
MAX(valid_to) AS valid_to
PATTERN (same_overlapping_limit+)
DEFINE
same_overlapping_limit AS
limit = FIRST(limit)
AND valid_from - INTERVAL '1' DAY <= MAX(same_overlapping_limit.valid_to)
)
其中,输出:
BP_ID LIMIT VALID_FROM VALID_TO 1 100 1999-05-15 00:00:00 9999-12-31 00:00:00 2 100 2000-01-01 00:00:00 2002-12-31 00:00:00 2 200 2003-01-01 00:00:00 2003-12-31 00:00:00 2 100 2004-01-01 00:00:00 2006-12-31 00:00:00
db<>fiddle here
同样 - 如果您的数据没有间隙并且一切都是连续的,您可以尝试“会话化”:添加一个每次“限制”时递增的会话 ID(我将其重命名为 lim 因为它可以成为保留字)变化。
首先,您添加一个计数器,如果 lim
与之前的 lim
不同,该计数器会更改 - 使用 OLAP LAG()
函数。
其次,您在外部查询中使用计数器对其执行 运行 求和。这为您提供了会话 ID。
最后,select 从带有会话 ID 的查询中获取最小起始日期和最大截止日期,按 bp_id
、lim
和 session_id
分组。
WITH
indata(bp_id,valid_from,valid_to,lim) as (
SELECT 1,DATE '1999-05-15',DATE '2020-01-01',100 FROM dual
UNION ALL SELECT 1,DATE '2020-01-01',DATE '2020-01-10',100 FROM dual
UNION ALL SELECT 1,DATE '2020-01-10',DATE '2020-05-15',100 FROM dual
UNION ALL SELECT 1,DATE '2020-05-15',DATE '2021-01-01',200 FROM dual
UNION ALL SELECT 1,DATE '2021-01-01',DATE '2021-01-10',200 FROM dual
UNION ALL SELECT 1,DATE '2021-01-10',DATE '9999-12-31',200 FROM dual
)
,
with_counter AS (
SELECT
bp_id
, valid_from
, valid_to
, lim
, CASE WHEN lim <> LAG(lim) OVER(PARTITION BY bp_id ORDER BY valid_from)
THEN 1
ELSE 0
END AS counter
FROM indata
)
,
with_session_id AS (
SELECT
bp_id
, valid_from
, valid_to
, lim
, SUM(counter) OVER(PARTITION BY bp_id ORDER BY valid_from) AS session_id
FROM with_counter
)
SELECT
bp_id
, MIN(valid_from) AS valid_from
, MAX(valid_to) AS valid_to
, lim
FROM with_session_id
GROUP BY
bp_id
, lim
, session_id;
-- out bp_id | valid_from | valid_to | lim
-- out -------+------------+------------+-----
-- out 1 | 1999-05-15 | 2020-05-15 | 100
-- out 1 | 2020-05-15 | 9999-12-31 | 200
正确的 CONNECT BY
版本可能看起来像
with pretab as (
select 1 as bp_id,
to_date('15.05.1999', 'dd.mm.yyyy') as valid_from,
to_date('01.01.2000', 'dd.mm.yyyy') as valid_to,
100 as limit
from dual
union
select 1 as bp_id,
to_date('01.01.2000', 'dd.mm.yyyy') as valid_from,
to_date('10.01.2000', 'dd.mm.yyyy') as valid_to,
100 as limit
from dual
union
select 1 as bp_id,
to_date('10.01.2000', 'dd.mm.yyyy') as valid_from,
to_date('31.12.9999', 'dd.mm.yyyy') as valid_to,
100 as limit
from dual
union
select 2 as bp_id,
to_date('15.05.1999', 'dd.mm.yyyy') as valid_from,
to_date('01.01.2002', 'dd.mm.yyyy') as valid_to,
200 as limit
from dual
union
select 2 as bp_id,
to_date('01.01.2002', 'dd.mm.yyyy') as valid_from,
to_date('10.01.2002', 'dd.mm.yyyy') as valid_to,
200 as limit
from dual
union
select 2 as bp_id,
to_date('10.01.2002', 'dd.mm.yyyy') as valid_from,
to_date('31.12.9999', 'dd.mm.yyyy') as valid_to,
200 as limit
from dual
),
pretab2 as (
select t1.*, case when
valid_from != lag(valid_to, 1, valid_from - 1) over (partition by bp_id order by valid_from)
or limit != lag(limit, 1, limit - 1) over (partition by bp_id order by valid_from)
then 1
else 0 end as start_dummy
from pretab t1
)
select bp_id, valid_from, valid_to, limit
from (
select bp_id, connect_by_root(valid_from) valid_from, valid_to, limit, row_number() over(partition by bp_id order by level desc) rn
from pretab2
CONNECT BY PRIOR bp_id = bp_id
and prior trunc(valid_to) = trunc(valid_from)
and prior limit = limit
start with start_dummy = 1
) where rn = 1;
Returns
BP_ID VALID_FROM VALID_TO LIMIT
1 1999-05-15 00:00:00 9999-12-31 00:00:00 100
2 1999-05-15 00:00:00 9999-12-31 00:00:00 200