根据日期范围找到至少 2 个连续的项目
find at least 2 consecutive items based on date ranges
类似问题有很多解决方案,但仅基于一个日期列。
我想知道也许更好的解决方案,我附上了我的解决方案,但我发现它有点复杂,如果你知道更好的方法,请 post 它。
这里是 table 订单,其中有 2 件商品的开始日期和结束日期。
我想根据日期和项目打印至少 2 个连续的行。
ITEM , START , END
1. A, 01.01.2020, 31.01.2020
2. A, 01.02.2020, 31.03.2020
3. B, 01.02.2020, 30.04.2020
4. A, 01.05.2020, 30.06.2020
5. B, 01.06.2020, 31.07.2020
6. B, 01.09.2020, 30.09.2020
7. A, 01.08.2020, 31.10.2020
8. B, 01.10.2020, 31.10.2020
9. B, 01.11.2020, 31.12.2020
项目 A 的输出应为第 1 行和第 2 行,项目 B 的输出应为第 6,8 行和第 9 行
这是我对此的看法
with pool as (
select ITEM, START_DATE, END_DATE,
nvl(lag(end_date,1) over (partition by item order by end_date),START_DATE-1) prev_End_Date
from orders )
, pool2 as (
select item ,
START_DATE, END_DATE,
sum(case when PREV_END_DATE+1 = START_DATE then 0 else 1 end ) over (partition by item order by START_DATE) grp
from pool )
select item,start_date,end_date from (
select
ITEM,
START_DATE,
END_DATE,
grp,
count(grp) over (partition by item,grp ) cnt
from pool2)
where cnt>=2
;
嗯。 . .使用 lag()
和 lead()
查看 next/previous 值并检查它们是否匹配:
select o.*
from (select o.*,
lag(end) over (partition by product order by start) as prev_end,
lead(start) over (partition by product order by start) as next_start
from orders o
) o
where start = prev_end + interval '1' day or
end = next_start - interval '1' day;
-- 创建 table 并为测试插入行
Create table order_overlap (id number, item varchar2(1), start_date date , end_date date );
insert into order_overlap(id,start_date, end_date, item) values( 1,to_date('01.01.2020', 'dd.mm.yyyy'), to_date( '31.01.2020', 'dd.mm.yyyy'), 'A');
insert into order_overlap(id,start_date, end_date, item) values( 2, to_date('01.02.2020', 'dd.mm.yyyy'), to_date( '31.03.2020', 'dd.mm.yyyy'), 'A');
insert into order_overlap(id,start_date, end_date, item) values( 3, to_date('01.02.2020', 'dd.mm.yyyy'), to_date( '30.04.2020', 'dd.mm.yyyy'), 'B');
insert into order_overlap(id,start_date, end_date, item) values( 4, to_date('01.05.2020', 'dd.mm.yyyy'), to_date( '30.06.2020', 'dd.mm.yyyy'), 'A');
insert into order_overlap(id,start_date, end_date, item) values( 5, to_date('01.06.2020', 'dd.mm.yyyy'), to_date( '31.07.2020', 'dd.mm.yyyy'), 'B');
insert into order_overlap(id,start_date, end_date, item) values( 6, to_date('01.09.2020', 'dd.mm.yyyy'), to_date( '30.09.2020', 'dd.mm.yyyy'), 'B');
insert into order_overlap(id,start_date, end_date, item) values( 7, to_date('01.08.2020', 'dd.mm.yyyy'), to_date( '31.10.2020', 'dd.mm.yyyy'), 'A');
insert into order_overlap(id,start_date, end_date, item) values( 8, to_date('01.10.2020', 'dd.mm.yyyy'), to_date( '31.10.2020', 'dd.mm.yyyy'), 'B');
insert into order_overlap(id,start_date, end_date, item) values( 5, to_date('01.11.2020', 'dd.mm.yyyy'), to_date( '31.12.2020', 'dd.mm.yyyy'), 'B');
-- 我做了一些不同的事情,但也许你喜欢它。
-- 我将连续的行合并为一个 - 所以如果你有项目
01.01.2020 - 31.01.2020
01.02.2020 - 28.02.2020
你得到一个记录
01.01.2020 - 28.02.2020
select item, min(start_date) start_date , max(end_date) end_date, count(*)
from (
select item, start_date, end_date,
case when lead(start_date) over(partition by item order by start_date) = end_date + 1
OR lag(end_date) over(partition by item order by end_date) + 1 = start_date
then 0
else rownum
end continuity
from order_overlap )
group by item, continuity
order by item, start_date;
您可以简单地使用 MATCH_RECOGNIZE
执行 row-by-row 比较,并且仅 return 匹配模式的行组:
SELECT *
FROM table_name
MATCH_RECOGNIZE (
PARTITION BY item
ORDER BY start_date, end_date
ALL ROWS PER MATCH
PATTERN ( FIRST_ROW NEXT_ROWS+ )
DEFINE
NEXT_ROWS AS (
NEXT_ROWS.START_DATE = PREV( END_DATE ) + INTERVAL '1' DAY
)
)
因此,对于您的示例数据:
CREATE TABLE table_name ( ITEM, START_DATE, END_DATE ) AS
SELECT 'A', DATE '2020-01-01', DATE '2020-01-31' FROM DUAL UNION ALL
SELECT 'A', DATE '2020-02-01', DATE '2020-03-31' FROM DUAL UNION ALL
SELECT 'B', DATE '2020-02-01', DATE '2020-04-30' FROM DUAL UNION ALL
SELECT 'A', DATE '2020-05-01', DATE '2020-06-30' FROM DUAL UNION ALL
SELECT 'B', DATE '2020-06-01', DATE '2020-07-31' FROM DUAL UNION ALL
SELECT 'B', DATE '2020-09-01', DATE '2020-09-30' FROM DUAL UNION ALL
SELECT 'A', DATE '2020-08-01', DATE '2020-10-31' FROM DUAL UNION ALL
SELECT 'B', DATE '2020-10-01', DATE '2020-10-31' FROM DUAL UNION ALL
SELECT 'B', DATE '2020-11-01', DATE '2020-12-31' FROM DUAL;
这输出:
ITEM | START_DATE | END_DATE
:--- | :--------- | :---------
A | 2020-01-01 | 2020-01-31
A | 2020-02-01 | 2020-03-31
B | 2020-09-01 | 2020-09-30
B | 2020-10-01 | 2020-10-31
B | 2020-11-01 | 2020-12-31
db<>fiddle here
类似问题有很多解决方案,但仅基于一个日期列。
我想知道也许更好的解决方案,我附上了我的解决方案,但我发现它有点复杂,如果你知道更好的方法,请 post 它。
这里是 table 订单,其中有 2 件商品的开始日期和结束日期。 我想根据日期和项目打印至少 2 个连续的行。
ITEM , START , END
1. A, 01.01.2020, 31.01.2020
2. A, 01.02.2020, 31.03.2020
3. B, 01.02.2020, 30.04.2020
4. A, 01.05.2020, 30.06.2020
5. B, 01.06.2020, 31.07.2020
6. B, 01.09.2020, 30.09.2020
7. A, 01.08.2020, 31.10.2020
8. B, 01.10.2020, 31.10.2020
9. B, 01.11.2020, 31.12.2020
项目 A 的输出应为第 1 行和第 2 行,项目 B 的输出应为第 6,8 行和第 9 行
这是我对此的看法
with pool as (
select ITEM, START_DATE, END_DATE,
nvl(lag(end_date,1) over (partition by item order by end_date),START_DATE-1) prev_End_Date
from orders )
, pool2 as (
select item ,
START_DATE, END_DATE,
sum(case when PREV_END_DATE+1 = START_DATE then 0 else 1 end ) over (partition by item order by START_DATE) grp
from pool )
select item,start_date,end_date from (
select
ITEM,
START_DATE,
END_DATE,
grp,
count(grp) over (partition by item,grp ) cnt
from pool2)
where cnt>=2
;
嗯。 . .使用 lag()
和 lead()
查看 next/previous 值并检查它们是否匹配:
select o.*
from (select o.*,
lag(end) over (partition by product order by start) as prev_end,
lead(start) over (partition by product order by start) as next_start
from orders o
) o
where start = prev_end + interval '1' day or
end = next_start - interval '1' day;
-- 创建 table 并为测试插入行
Create table order_overlap (id number, item varchar2(1), start_date date , end_date date );
insert into order_overlap(id,start_date, end_date, item) values( 1,to_date('01.01.2020', 'dd.mm.yyyy'), to_date( '31.01.2020', 'dd.mm.yyyy'), 'A');
insert into order_overlap(id,start_date, end_date, item) values( 2, to_date('01.02.2020', 'dd.mm.yyyy'), to_date( '31.03.2020', 'dd.mm.yyyy'), 'A');
insert into order_overlap(id,start_date, end_date, item) values( 3, to_date('01.02.2020', 'dd.mm.yyyy'), to_date( '30.04.2020', 'dd.mm.yyyy'), 'B');
insert into order_overlap(id,start_date, end_date, item) values( 4, to_date('01.05.2020', 'dd.mm.yyyy'), to_date( '30.06.2020', 'dd.mm.yyyy'), 'A');
insert into order_overlap(id,start_date, end_date, item) values( 5, to_date('01.06.2020', 'dd.mm.yyyy'), to_date( '31.07.2020', 'dd.mm.yyyy'), 'B');
insert into order_overlap(id,start_date, end_date, item) values( 6, to_date('01.09.2020', 'dd.mm.yyyy'), to_date( '30.09.2020', 'dd.mm.yyyy'), 'B');
insert into order_overlap(id,start_date, end_date, item) values( 7, to_date('01.08.2020', 'dd.mm.yyyy'), to_date( '31.10.2020', 'dd.mm.yyyy'), 'A');
insert into order_overlap(id,start_date, end_date, item) values( 8, to_date('01.10.2020', 'dd.mm.yyyy'), to_date( '31.10.2020', 'dd.mm.yyyy'), 'B');
insert into order_overlap(id,start_date, end_date, item) values( 5, to_date('01.11.2020', 'dd.mm.yyyy'), to_date( '31.12.2020', 'dd.mm.yyyy'), 'B');
-- 我做了一些不同的事情,但也许你喜欢它。 -- 我将连续的行合并为一个 - 所以如果你有项目 01.01.2020 - 31.01.2020 01.02.2020 - 28.02.2020
你得到一个记录 01.01.2020 - 28.02.2020
select item, min(start_date) start_date , max(end_date) end_date, count(*)
from (
select item, start_date, end_date,
case when lead(start_date) over(partition by item order by start_date) = end_date + 1
OR lag(end_date) over(partition by item order by end_date) + 1 = start_date
then 0
else rownum
end continuity
from order_overlap )
group by item, continuity
order by item, start_date;
您可以简单地使用 MATCH_RECOGNIZE
执行 row-by-row 比较,并且仅 return 匹配模式的行组:
SELECT *
FROM table_name
MATCH_RECOGNIZE (
PARTITION BY item
ORDER BY start_date, end_date
ALL ROWS PER MATCH
PATTERN ( FIRST_ROW NEXT_ROWS+ )
DEFINE
NEXT_ROWS AS (
NEXT_ROWS.START_DATE = PREV( END_DATE ) + INTERVAL '1' DAY
)
)
因此,对于您的示例数据:
CREATE TABLE table_name ( ITEM, START_DATE, END_DATE ) AS
SELECT 'A', DATE '2020-01-01', DATE '2020-01-31' FROM DUAL UNION ALL
SELECT 'A', DATE '2020-02-01', DATE '2020-03-31' FROM DUAL UNION ALL
SELECT 'B', DATE '2020-02-01', DATE '2020-04-30' FROM DUAL UNION ALL
SELECT 'A', DATE '2020-05-01', DATE '2020-06-30' FROM DUAL UNION ALL
SELECT 'B', DATE '2020-06-01', DATE '2020-07-31' FROM DUAL UNION ALL
SELECT 'B', DATE '2020-09-01', DATE '2020-09-30' FROM DUAL UNION ALL
SELECT 'A', DATE '2020-08-01', DATE '2020-10-31' FROM DUAL UNION ALL
SELECT 'B', DATE '2020-10-01', DATE '2020-10-31' FROM DUAL UNION ALL
SELECT 'B', DATE '2020-11-01', DATE '2020-12-31' FROM DUAL;
这输出:
ITEM | START_DATE | END_DATE :--- | :--------- | :--------- A | 2020-01-01 | 2020-01-31 A | 2020-02-01 | 2020-03-31 B | 2020-09-01 | 2020-09-30 B | 2020-10-01 | 2020-10-31 B | 2020-11-01 | 2020-12-31
db<>fiddle here