SQL:从重叠日期推导出有效定价
SQL: Deduce Effective Pricing from overlapping Dates
我有日期重叠的定价记录。很少有日期会出现不止一个重叠价格。请按照以下示例操作:
例如 2022/02/15 有 2 个价格 10 和 8。
article
price
startdate
enddate
123
10
2022/02/02
2049/12/31
123
8
2022/02/14
2022/09/14
123
5
2022/03/14
2022/04/06
123
4
2022/04/11
2022/04/27
我想对如下日期范围应用有效价格,避免输出中出现价格冲突。
article
price
startdate
enddate
123
10
2022/02/02
2022/02/13
123
8
2022/02/14
2022/03/13
123
5
2022/03/14
2022/04/06
123
8
2022/04/07
2022/04/10
123
4
2022/04/11
2022/04/27
123
8
2022/04/28
2022/09/14
123
10
2022/09/15
2049/12/31
我能想到 window 功能来调整结束日期和价格,但我无法完全解决问题以获得完整的解决方案。任何 suggestion/solution 表示赞赏。
数据库:雪花
谢谢
使用新起始价格的逻辑 window 重叠获胜。
创建日期版本:
with data(article,price,startdate,enddate) as (
select * FROM VALUES
(123, 10, '2022-02-02'::date, '2049-12-31'::date),
(123, 8, '2022-02-14'::date, '2022-09-14'::date),
(123, 5, '2022-03-14'::date, '2022-04-06'::date),
(123, 4, '2022-04-11'::date, '2022-04-27'::date)
), dis_times as (
select article,
date as startdate,
lead(date) over(partition by article order by date)-1 as enddate
from (
select distinct article, startdate as date from data
union
select distinct article, enddate+1 as date from data
)
qualify enddate is not null
)
select
d1.article,
d1.price,
d2.startdate,
d2.enddate
from data as d1
join dis_times as d2
on d1.article = d2.article
and d2.startdate between d1.startdate and d1.enddate qualify row_number() over (partition by d1.article, s_startdate order by d1.startdate desc) = 1
order by 1,3;
给出:
ARTICLE
PRICE
S_STARTDATE
S_ENDDATE
123
10
2022-02-02
2022-02-13
123
8
2022-02-14
2022-03-13
123
5
2022-03-14
2022-04-06
123
8
2022-04-07
2022-04-10
123
4
2022-04-11
2022-04-27
123
8
2022-04-28
2022-09-14
123
10
2022-09-15
2049-12-31
连续时间戳版本:
with data(article,price,startdate,enddate) as (
select * FROM VALUES
(123, 10, '2022-02-02'::date, '2049-12-31'::date),
(123, 8, '2022-02-14'::date, '2022-09-14'::date),
(123, 5, '2022-03-14'::date, '2022-04-06'::date),
(123, 4, '2022-04-11'::date, '2022-04-27'::date)
), dis_times as (
select article,
date as startdate,
lead(date) over(partition by article order by date) as enddate
from (
select distinct article, startdate as date from data
union
select distinct article, enddate as date from data
)
qualify enddate is not null
)
select
d1.article,
d1.price,
d2.startdate,
d2.enddate
from data as d1
join dis_times as d2
on d1.article = d2.article
and d2.startdate >= d1.startdate and d2.startdate < d1.enddate
qualify row_number() over (partition by d1.article, s_startdate order by d1.startdate desc) = 1
order by 1,3;
给出:
ARTICLE
PRICE
S_STARTDATE
S_ENDDATE
123
10
2022-02-02
2022-02-14
123
8
2022-02-14
2022-03-14
123
5
2022-03-14
2022-04-06
123
8
2022-04-06
2022-04-11
123
4
2022-04-11
2022-04-27
123
8
2022-04-27
2022-09-14
123
10
2022-09-14
2049-12-31
感谢 MatBailie 提供更紧密的连接建议。
join dis_times as d2
on d1.article = d2.article
and d2.startdate between d1.startdate and d1.enddate
我通常会在这方面做的连续范围
and d2.startdate between d1.startdate and d1.enddate and d2.startdate < d1.enddate
而不是这种形式
and d2.startdate >= d1.startdate and d2.startdate < d1.enddate
因为我的经验是它表现更好。总是测试你的复杂性。
我做的第一件事是——我将您的 price-per-date 范围数据转换为 price-per-date 查找 table。
create or replace temporary table price_date_lookup as
select distinct
article,
dateadd('day',b.index-1,start_date) as dates,
first_value(price) over (partition by article, dates order by end_date) as price
from my_table,
lateral split_to_table(repeat('.',datediff(day,start_date,end_date)), '.') b;
备注:
first_value
通过基于结束日期覆盖价格来处理重叠。
lateral...
基本上有助于创建一个包含范围内所有日期的日期列
我一创建 table,我就认为其余的可以像 gaps and island
问题一样处理。
with cte1 as
(select *, case when lag(price) over (partition by article order by dates)=price then 0 else 1 end as price_start --flag start of a new price island
from price_date_lookup),
cte2 as
(select *, sum(price_start) over (partition by article order by dates) as price_id --assign id to all the price islands
from cte1)
select article,
price,
min(dates) as start_date,
max(dates) as end_date
from cte2
group by article,price,price_id;
我有日期重叠的定价记录。很少有日期会出现不止一个重叠价格。请按照以下示例操作:
例如 2022/02/15 有 2 个价格 10 和 8。
article | price | startdate | enddate |
---|---|---|---|
123 | 10 | 2022/02/02 | 2049/12/31 |
123 | 8 | 2022/02/14 | 2022/09/14 |
123 | 5 | 2022/03/14 | 2022/04/06 |
123 | 4 | 2022/04/11 | 2022/04/27 |
我想对如下日期范围应用有效价格,避免输出中出现价格冲突。
article | price | startdate | enddate |
---|---|---|---|
123 | 10 | 2022/02/02 | 2022/02/13 |
123 | 8 | 2022/02/14 | 2022/03/13 |
123 | 5 | 2022/03/14 | 2022/04/06 |
123 | 8 | 2022/04/07 | 2022/04/10 |
123 | 4 | 2022/04/11 | 2022/04/27 |
123 | 8 | 2022/04/28 | 2022/09/14 |
123 | 10 | 2022/09/15 | 2049/12/31 |
我能想到 window 功能来调整结束日期和价格,但我无法完全解决问题以获得完整的解决方案。任何 suggestion/solution 表示赞赏。
数据库:雪花
谢谢
使用新起始价格的逻辑 window 重叠获胜。
创建日期版本:
with data(article,price,startdate,enddate) as (
select * FROM VALUES
(123, 10, '2022-02-02'::date, '2049-12-31'::date),
(123, 8, '2022-02-14'::date, '2022-09-14'::date),
(123, 5, '2022-03-14'::date, '2022-04-06'::date),
(123, 4, '2022-04-11'::date, '2022-04-27'::date)
), dis_times as (
select article,
date as startdate,
lead(date) over(partition by article order by date)-1 as enddate
from (
select distinct article, startdate as date from data
union
select distinct article, enddate+1 as date from data
)
qualify enddate is not null
)
select
d1.article,
d1.price,
d2.startdate,
d2.enddate
from data as d1
join dis_times as d2
on d1.article = d2.article
and d2.startdate between d1.startdate and d1.enddate qualify row_number() over (partition by d1.article, s_startdate order by d1.startdate desc) = 1
order by 1,3;
给出:
ARTICLE | PRICE | S_STARTDATE | S_ENDDATE |
---|---|---|---|
123 | 10 | 2022-02-02 | 2022-02-13 |
123 | 8 | 2022-02-14 | 2022-03-13 |
123 | 5 | 2022-03-14 | 2022-04-06 |
123 | 8 | 2022-04-07 | 2022-04-10 |
123 | 4 | 2022-04-11 | 2022-04-27 |
123 | 8 | 2022-04-28 | 2022-09-14 |
123 | 10 | 2022-09-15 | 2049-12-31 |
连续时间戳版本:
with data(article,price,startdate,enddate) as (
select * FROM VALUES
(123, 10, '2022-02-02'::date, '2049-12-31'::date),
(123, 8, '2022-02-14'::date, '2022-09-14'::date),
(123, 5, '2022-03-14'::date, '2022-04-06'::date),
(123, 4, '2022-04-11'::date, '2022-04-27'::date)
), dis_times as (
select article,
date as startdate,
lead(date) over(partition by article order by date) as enddate
from (
select distinct article, startdate as date from data
union
select distinct article, enddate as date from data
)
qualify enddate is not null
)
select
d1.article,
d1.price,
d2.startdate,
d2.enddate
from data as d1
join dis_times as d2
on d1.article = d2.article
and d2.startdate >= d1.startdate and d2.startdate < d1.enddate
qualify row_number() over (partition by d1.article, s_startdate order by d1.startdate desc) = 1
order by 1,3;
给出:
ARTICLE | PRICE | S_STARTDATE | S_ENDDATE |
---|---|---|---|
123 | 10 | 2022-02-02 | 2022-02-14 |
123 | 8 | 2022-02-14 | 2022-03-14 |
123 | 5 | 2022-03-14 | 2022-04-06 |
123 | 8 | 2022-04-06 | 2022-04-11 |
123 | 4 | 2022-04-11 | 2022-04-27 |
123 | 8 | 2022-04-27 | 2022-09-14 |
123 | 10 | 2022-09-14 | 2049-12-31 |
感谢 MatBailie 提供更紧密的连接建议。
join dis_times as d2
on d1.article = d2.article
and d2.startdate between d1.startdate and d1.enddate
我通常会在这方面做的连续范围
and d2.startdate between d1.startdate and d1.enddate and d2.startdate < d1.enddate
而不是这种形式
and d2.startdate >= d1.startdate and d2.startdate < d1.enddate
因为我的经验是它表现更好。总是测试你的复杂性。
我做的第一件事是——我将您的 price-per-date 范围数据转换为 price-per-date 查找 table。
create or replace temporary table price_date_lookup as
select distinct
article,
dateadd('day',b.index-1,start_date) as dates,
first_value(price) over (partition by article, dates order by end_date) as price
from my_table,
lateral split_to_table(repeat('.',datediff(day,start_date,end_date)), '.') b;
备注:
first_value
通过基于结束日期覆盖价格来处理重叠。lateral...
基本上有助于创建一个包含范围内所有日期的日期列
我一创建 table,我就认为其余的可以像 gaps and island
问题一样处理。
with cte1 as
(select *, case when lag(price) over (partition by article order by dates)=price then 0 else 1 end as price_start --flag start of a new price island
from price_date_lookup),
cte2 as
(select *, sum(price_start) over (partition by article order by dates) as price_id --assign id to all the price islands
from cte1)
select article,
price,
min(dates) as start_date,
max(dates) as end_date
from cte2
group by article,price,price_id;