具有开始和结束日期的差距和岛屿(有效期)
Gaps and Islands with start en end date (ValidPeriod)
我已搜索但找不到以下问题的解决方案。
我有几个价目表,有几百万行,我发现很多例子可以聚合成一行,因为组的开始和结束日期是连续的(结束日期:20151231 下一个开始日期:20160101)
但我也发现了很多差距,这意味着使用 min() 和 max() 函数的直接方法不适用,因为可能的差距将被忽略。
以下包含带有示例记录的#Prices table 和带有我正在拍摄的结果的#Target table:
谢谢。
我对间隔的定义是两个连续记录之间的间隔超过 1 天。
if object_id('tempdb..#Prices', 'table') is not null
drop table #Prices
;
create table #Prices (
Product varchar(50) not null
, Value decimal(18,5) not null
, ValidFrom date not null
, ValidTo date null
)
insert into #Prices
(
Product
, Value
, ValidFrom
, ValidTo
)
select
Product = 'Island A'
, Value = 10.10
, ValidFrom = '20140101'
, ValidTo = '20140606'
union all
select
Product = 'Island A'
, Value = 10.10
, ValidFrom = '20140607'
, ValidTo = '20141010'
union all
select
Product = 'Island A'
, Value = 10.11
, ValidFrom = '20141011'
, ValidTo = '20141231'
union all
select
Product = 'Island A'
, Value = 11.10
, ValidFrom = '20150101'
, ValidTo = '20151231'
union all
select
Product = 'Island A'
, Value = 10.10
, ValidFrom = '20160101'
, ValidTo = null
union all
select
Product = 'Gap B'
, Value = 20.10
, ValidFrom = '20140101'
, ValidTo = '20140606'
union all
select
Product = 'Gap B'
, Value = 20.10
, ValidFrom = '20140607'
, ValidTo = '20141010'
union all
select
Product = 'Gap B'
, Value = 20.10
, ValidFrom = '20150101'
, ValidTo = '20151231'
union all
select
Product = 'Gap B'
, Value = 20.10
, ValidFrom = '20160101'
, ValidTo = null
select *
from #Prices as P
order by P.Product, P.ValidFrom
;
if object_id('tempdb..#Target', 'table') is not null
drop table #Target
;
create table #Target (
Product varchar(50) not null
, Value decimal(18,5) not null
, ValidFrom date not null
, ValidTo date null
)
insert into #Target
(
Product
, Value
, ValidFrom
, ValidTo
)
select
Product = 'Island A'
, Value = 10.10
, ValidFrom = '20140101'
, ValidTo = '20141010'
union all
select
Product = 'Island A'
, Value = 10.11
, ValidFrom = '20141011'
, ValidTo = '20141231'
union all
select
Product = 'Island A'
, Value = 11.10
, ValidFrom = '20150101'
, ValidTo = '20151231'
union all
select
Product = 'Island A'
, Value = 10.10
, ValidFrom = '20160101'
, ValidTo = null
union all
select
Product = 'Gap B'
, Value = 20.10
, ValidFrom = '20140101'
, ValidTo = '20141010'
union all
select
Product = 'Gap B'
, Value = 20.10
, ValidFrom = '20150101'
, ValidTo = null
select *
from #Target as P
order by P.Product, P.ValidFrom
;
编辑
我希望编辑是您问题的答案。连续的记录(记录之间最多 1 天)可以通过取 min(ValidFrom) 和 max(ValidTo) 来聚合。问题在于差距,这些将被忽略。 Product 'Gap B' 的结果将是一条记录。
即使日期在 Gap 期间,此记录上任何带有日期的命中都将获得值 20.10。
Gap B | 20.10 | 20140101 | null
因此我需要 2 条记录,这样 table 上的所有连接都会产生正确的值,而在 Gap
期间没有值
Gap B | 20.10 | 20140101 | 20141010
Gap B | 20.10 | 20151231 | null
必须为 NULL ValidTo 加入一些逻辑
;with cte0(N) as (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N))
,cte1(R,D) as (Select Row_Number() over (Order By (Select Null))
,DateAdd(DD,Row_Number() over (Order By (Select Null)) -1,(Select min(ValidFrom) From #Prices))
From cte0 N1, cte0 N2, cte0 N3, cte0 N4)
Select Product
,Value
,ValidFrom = Min(ValidFrom)
,ValidTo = nullif(max(isnull(ValidTo,'2099-12-31')),'2099-12-31')
From (
Select *
,Island = R - Row_Number() over (Partition By Product,Value Order by ValidFrom)
From #Prices A
Join cte1 B on D Between ValidFrom and IsNull(ValidTo,'2099-12-31')
) A
Group By Product,Value,Island
Order By 1 Desc,3
Returns
Product Value ValidFrom ValidTo
Island A 10.10000 2014-01-01 2014-10-10
Island A 10.11000 2014-10-11 2014-12-31
Island A 11.10000 2015-01-01 2015-12-31
Island A 10.10000 2016-01-01 NULL
Gap B 20.10000 2014-01-01 2014-10-10
Gap B 20.10000 2015-01-01 NULL
这是一个使用递归 cte 的不同解决方案,我认为与 Jon 的相比更容易理解一些。在这个数据量上它也更有效,尽管你需要自己测试更大数据集的性能:
;with rownum
as
(
select row_number() over (order by Product, ValidFrom) as rn
,Product
,Value
,ValidFrom
,ValidTo
from #Prices
)
,cte
as
(
select rn
,Product
,Value
,ValidFrom
,ValidFrom as ValidFrom2
,ValidTo
from rownum
where rn = 1
union all
select r.rn
,r.Product
,r.Value
,r.ValidFrom
,case when c.Product = r.Product
then case when dateadd(d,1,c.ValidTo) = r.ValidFrom
then c.ValidFrom
else r.ValidFrom
end
else r.ValidFrom
end as ValidFrom2
,isnull(r.ValidTo,'29990101') as ValidTo
from rownum r
inner join cte c
on(r.rn = c.rn+1)
)
select Product
,Value
,ValidFrom2 as ValidFrom
,nullif(max(ValidTo),'29990101') as ValidTo
from cte
group by Product
,Value
,ValidFrom2
order by Product
,ValidFrom2;
我已搜索但找不到以下问题的解决方案。
我有几个价目表,有几百万行,我发现很多例子可以聚合成一行,因为组的开始和结束日期是连续的(结束日期:20151231 下一个开始日期:20160101)
但我也发现了很多差距,这意味着使用 min() 和 max() 函数的直接方法不适用,因为可能的差距将被忽略。
以下包含带有示例记录的#Prices table 和带有我正在拍摄的结果的#Target table:
谢谢。
我对间隔的定义是两个连续记录之间的间隔超过 1 天。
if object_id('tempdb..#Prices', 'table') is not null
drop table #Prices
;
create table #Prices (
Product varchar(50) not null
, Value decimal(18,5) not null
, ValidFrom date not null
, ValidTo date null
)
insert into #Prices
(
Product
, Value
, ValidFrom
, ValidTo
)
select
Product = 'Island A'
, Value = 10.10
, ValidFrom = '20140101'
, ValidTo = '20140606'
union all
select
Product = 'Island A'
, Value = 10.10
, ValidFrom = '20140607'
, ValidTo = '20141010'
union all
select
Product = 'Island A'
, Value = 10.11
, ValidFrom = '20141011'
, ValidTo = '20141231'
union all
select
Product = 'Island A'
, Value = 11.10
, ValidFrom = '20150101'
, ValidTo = '20151231'
union all
select
Product = 'Island A'
, Value = 10.10
, ValidFrom = '20160101'
, ValidTo = null
union all
select
Product = 'Gap B'
, Value = 20.10
, ValidFrom = '20140101'
, ValidTo = '20140606'
union all
select
Product = 'Gap B'
, Value = 20.10
, ValidFrom = '20140607'
, ValidTo = '20141010'
union all
select
Product = 'Gap B'
, Value = 20.10
, ValidFrom = '20150101'
, ValidTo = '20151231'
union all
select
Product = 'Gap B'
, Value = 20.10
, ValidFrom = '20160101'
, ValidTo = null
select *
from #Prices as P
order by P.Product, P.ValidFrom
;
if object_id('tempdb..#Target', 'table') is not null
drop table #Target
;
create table #Target (
Product varchar(50) not null
, Value decimal(18,5) not null
, ValidFrom date not null
, ValidTo date null
)
insert into #Target
(
Product
, Value
, ValidFrom
, ValidTo
)
select
Product = 'Island A'
, Value = 10.10
, ValidFrom = '20140101'
, ValidTo = '20141010'
union all
select
Product = 'Island A'
, Value = 10.11
, ValidFrom = '20141011'
, ValidTo = '20141231'
union all
select
Product = 'Island A'
, Value = 11.10
, ValidFrom = '20150101'
, ValidTo = '20151231'
union all
select
Product = 'Island A'
, Value = 10.10
, ValidFrom = '20160101'
, ValidTo = null
union all
select
Product = 'Gap B'
, Value = 20.10
, ValidFrom = '20140101'
, ValidTo = '20141010'
union all
select
Product = 'Gap B'
, Value = 20.10
, ValidFrom = '20150101'
, ValidTo = null
select *
from #Target as P
order by P.Product, P.ValidFrom
;
编辑 我希望编辑是您问题的答案。连续的记录(记录之间最多 1 天)可以通过取 min(ValidFrom) 和 max(ValidTo) 来聚合。问题在于差距,这些将被忽略。 Product 'Gap B' 的结果将是一条记录。 即使日期在 Gap 期间,此记录上任何带有日期的命中都将获得值 20.10。
Gap B | 20.10 | 20140101 | null
因此我需要 2 条记录,这样 table 上的所有连接都会产生正确的值,而在 Gap
期间没有值Gap B | 20.10 | 20140101 | 20141010
Gap B | 20.10 | 20151231 | null
必须为 NULL ValidTo 加入一些逻辑
;with cte0(N) as (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N))
,cte1(R,D) as (Select Row_Number() over (Order By (Select Null))
,DateAdd(DD,Row_Number() over (Order By (Select Null)) -1,(Select min(ValidFrom) From #Prices))
From cte0 N1, cte0 N2, cte0 N3, cte0 N4)
Select Product
,Value
,ValidFrom = Min(ValidFrom)
,ValidTo = nullif(max(isnull(ValidTo,'2099-12-31')),'2099-12-31')
From (
Select *
,Island = R - Row_Number() over (Partition By Product,Value Order by ValidFrom)
From #Prices A
Join cte1 B on D Between ValidFrom and IsNull(ValidTo,'2099-12-31')
) A
Group By Product,Value,Island
Order By 1 Desc,3
Returns
Product Value ValidFrom ValidTo
Island A 10.10000 2014-01-01 2014-10-10
Island A 10.11000 2014-10-11 2014-12-31
Island A 11.10000 2015-01-01 2015-12-31
Island A 10.10000 2016-01-01 NULL
Gap B 20.10000 2014-01-01 2014-10-10
Gap B 20.10000 2015-01-01 NULL
这是一个使用递归 cte 的不同解决方案,我认为与 Jon 的相比更容易理解一些。在这个数据量上它也更有效,尽管你需要自己测试更大数据集的性能:
;with rownum
as
(
select row_number() over (order by Product, ValidFrom) as rn
,Product
,Value
,ValidFrom
,ValidTo
from #Prices
)
,cte
as
(
select rn
,Product
,Value
,ValidFrom
,ValidFrom as ValidFrom2
,ValidTo
from rownum
where rn = 1
union all
select r.rn
,r.Product
,r.Value
,r.ValidFrom
,case when c.Product = r.Product
then case when dateadd(d,1,c.ValidTo) = r.ValidFrom
then c.ValidFrom
else r.ValidFrom
end
else r.ValidFrom
end as ValidFrom2
,isnull(r.ValidTo,'29990101') as ValidTo
from rownum r
inner join cte c
on(r.rn = c.rn+1)
)
select Product
,Value
,ValidFrom2 as ValidFrom
,nullif(max(ValidTo),'29990101') as ValidTo
from cte
group by Product
,Value
,ValidFrom2
order by Product
,ValidFrom2;