SQL 按 Type2 SC 维度的月份汇总的状态总值
SQL Summary of Status total values by Month of Type2 SC Dimension
我有凭证状态历史记录 table 作为类型 2 缓慢变化的维度 table 我试图在特定日期之前的每个月获取每个状态的汇总总值。这是我的架构和插入代码:
CREATE TABLE #HDimVouchers(
[HVoucherKey] [bigint] IDENTITY(1,1) NOT NULL,
[Voucher_id] [bigint] NOT NULL,
[VoucherStatusKey] [int] NOT NULL,
[Voucher_amt] [decimal](18, 2) NULL,
[DateStatusStart] [date] NULL,
[DateStatusEnd] [date] NULL
)
--drop table #HDimVouchers
insert #HDimVouchers
values
(10,2,10.00,'2019-01-01','2019-02-15'),
(10,4,10.00,'2019-02-16',null),
(13,4,10.00,'2019-01-10',null),
(11,2,15.00,'2019-01-01',null),
(12,2,20.00,'2019-03-12','2019-03-12'),
(12,4,20.00,'2019-03-13',null),
(15,2,205.00,'2019-05-25','2020-04-24'),
(15,6,205.00,'2020-04-25',null),
(21,2,100.00,'2019-02-16',null)
我想通过 voucherstatuskey 获得按年-月的总价值摘要,如下所示:
[Year-Month]
[VoucherStatusKey]
[Amount]
201901
2
25
201901
4
10
201902
2
100
201902
4
10
201903
4
20
201905
2
205
201906
2
205
201907
2
205
201908
2
205
201909
2
205
201910
2
205
201911
2
205
201912
2
205
202001
2
205
202002
2
205
202003
2
205
我曾多次尝试获取上述数据,但我一直在努力获取正确的格式和值。以下是我尝试过的东西
SELECT convert(nvarchar(4),Year([DateStatusStart])) + RIGHT('00' + CONVERT(NVARCHAR(2), DATEPART(Month, [DateStatusStart])), 2)
,[VoucherStatusKey]
,SUM([Voucher_amt]) OVER (PARTITION BY Year([DateStatusStart]),Month([DateStatusStart]), [VoucherStatusKey] ORDER BY [DateStatusStart]) AS running_total
FROM #HDimVouchers where [DateStatusStart] < '2020-03-31';
假设您想要月末的值。那么,您可以采取以下做法:
- 为每张凭证生成所有适当的月份。
- 使用
join
引入适当的值。
对于第一部分,您可以使用理货单或日历 table(如果有的话)。然而,递归 CTE 也很方便:
with vdates as (
select voucher_id, eomonth(min(DateStatusStart)) as eom
from HDimVouchers
group by voucher_id
union all
select voucher_id, eomonth(dateadd(month, 1, eom))
from vdates
where eom < '2020-03-01'
)
select vd.*, hv.Voucher_amt
from vdates vd join
HDimVouchers hv
on hv.voucher_id = vd.voucher_id and
vd.eom >= hv.DateStatusStart and
(vd.eom <= hv.DateStatusEnd or hv.DateStatusEnd is null)
order by vd.eom, vd.voucher_id;
Here 是一个 db<>fiddle.
我对此的看法是:
;with [dates] as (
select YEAR(MIN([DateStatusStart]))*100+MONTH(MIN([DateStatusStart])) [YM] from #HDimVouchers
union all
select case when ([dates].[YM] % 100) = 12 then [dates].[YM] + 100 - 11 else [dates].[YM] + 1 end from [dates] where [YM] < 202112
), [dimkeys] as (
select
[Voucher_id],
YEAR(MIN([DateStatusStart]))*100+MONTH(MIN([DateStatusStart])) [DateStatusStart],
YEAR(MAX(ISNULL([DateStatusEnd], DATEFROMPARTS(2999, 12, 31))))*100+MONTH(MAX(ISNULL([DateStatusEnd], DATEFROMPARTS(2999, 12, 31)))) [DateStatusEnd]
from [#HDimVouchers] group by [Voucher_id]
), [map] as (
select
[dimkeys].[Voucher_id],
[dates].[YM],
COALESCE(
MAX([d].[DateStatusStart]),
(select MAX([i].[DateStatusStart]) from [#HDimVouchers] [i] where [i].[Voucher_id] = [dimkeys].[Voucher_id] and YEAR([i].[DateStatusStart])*100+MONTH([i].[DateStatusStart]) < [dates].[YM]),
(select MIN([i].[DateStatusStart]) from [#HDimVouchers] [i] where [i].[Voucher_id] = [dimkeys].[Voucher_id])
) [MappingDate]
from [dates]
cross join [dimkeys]
left join [#HDimVouchers] [d] on [d].[Voucher_id] = [dimkeys].[Voucher_id] and YEAR([d].[DateStatusStart])*100+MONTH([d].[DateStatusStart]) = [dates].[YM]
where [dates].[YM] >= [dimkeys].[DateStatusStart] and [dates].[YM] <= [dimkeys].[DateStatusEnd]
group by [dimkeys].[Voucher_id], [dates].[YM]
)
select [map].[YM], [fact].[VoucherStatusKey], SUM([fact].[Voucher_amt]) [Sum]
from [map] join [#HDimVouchers] [fact] on [fact].[Voucher_id] = [map].[Voucher_id] and [fact].[DateStatusStart] = [map].[MappingDate]
group by [map].[YM], [fact].[VoucherStatusKey]
order by [YM], [VoucherStatusKey];
所以:
- 获取从开始到结束的所有年月值
- 获取所有不同的键及其总体 min/max 日期(即成员存在时)
- 交叉加入他们以获得每个年月的每个键的条目(在成员的生命周期内)
- 添加应该用于映射的日期(这用于决定将在年-月中更改的成员添加到哪个月份)
- 然后才将其与完整维度结合起来并按 SCD 类型 2 属性分组
更新
对于大表,您可以将其拆分为多个临时表。表,而不是全力以赴的 CTE。这通常对性能有很大帮助。
select *,
YEAR([DateStatusStart])*100+MONTH([DateStatusStart]) [YmStart],
YEAR([DateStatusEnd])*100+MONTH([DateStatusEnd]) [YmEnd]
into [#withYm]
from [#HDimVouchers];
;with [dates] as (
select MIN([YmStart]) [YM] from [#withYm]
union all
select case when ([dates].[YM] % 100) = 12 then [dates].[YM] + 100 - 11 else [dates].[YM] + 1 end from [dates] where [YM] < 202112
), [dimkeys] as (
select
[Voucher_id],
MIN([YmStart]) [YmStart],
MAX(ISNULL([YmEnd], 299912)) [YmEnd]
from [#withYm]
group by [Voucher_id]
)
select
[dimkeys].[Voucher_id],
[dates].[YM]
into [#all]
from [dates]
cross join [dimkeys]
where [dates].[YM] >= [dimkeys].[YmStart] and [dates].[YM] <= [dimkeys].[YmEnd]
;with [map] as (
select
[#all].[Voucher_id],
[#all].[YM],
ISNULL(
MAX([d].[DateStatusStart]),
(select MAX([i].[DateStatusStart]) from [#withYm] [i] where [i].[Voucher_id] = [#all].[Voucher_id] and [i].[YmStart] < [#all].[YM])
) [MappingDate]
from [#all]
left join [#HDimVouchers] [d] on [d].[Voucher_id] = [#all].[Voucher_id] and YEAR([d].[DateStatusStart])*100+MONTH([d].[DateStatusStart]) = [#all].[YM]
group by [#all].[Voucher_id], [#all].[YM]
)
select [map].[YM], [fact].[VoucherStatusKey], SUM([fact].[Voucher_amt]) [Sum]
from [map] join [#HDimVouchers] [fact] on [fact].[Voucher_id] = [map].[Voucher_id] and [fact].[DateStatusStart] = [map].[MappingDate]
group by [map].[YM], [fact].[VoucherStatusKey]
order by [YM], [VoucherStatusKey];
我有凭证状态历史记录 table 作为类型 2 缓慢变化的维度 table 我试图在特定日期之前的每个月获取每个状态的汇总总值。这是我的架构和插入代码:
CREATE TABLE #HDimVouchers(
[HVoucherKey] [bigint] IDENTITY(1,1) NOT NULL,
[Voucher_id] [bigint] NOT NULL,
[VoucherStatusKey] [int] NOT NULL,
[Voucher_amt] [decimal](18, 2) NULL,
[DateStatusStart] [date] NULL,
[DateStatusEnd] [date] NULL
)
--drop table #HDimVouchers
insert #HDimVouchers
values
(10,2,10.00,'2019-01-01','2019-02-15'),
(10,4,10.00,'2019-02-16',null),
(13,4,10.00,'2019-01-10',null),
(11,2,15.00,'2019-01-01',null),
(12,2,20.00,'2019-03-12','2019-03-12'),
(12,4,20.00,'2019-03-13',null),
(15,2,205.00,'2019-05-25','2020-04-24'),
(15,6,205.00,'2020-04-25',null),
(21,2,100.00,'2019-02-16',null)
我想通过 voucherstatuskey 获得按年-月的总价值摘要,如下所示:
[Year-Month] | [VoucherStatusKey] | [Amount] |
---|---|---|
201901 | 2 | 25 |
201901 | 4 | 10 |
201902 | 2 | 100 |
201902 | 4 | 10 |
201903 | 4 | 20 |
201905 | 2 | 205 |
201906 | 2 | 205 |
201907 | 2 | 205 |
201908 | 2 | 205 |
201909 | 2 | 205 |
201910 | 2 | 205 |
201911 | 2 | 205 |
201912 | 2 | 205 |
202001 | 2 | 205 |
202002 | 2 | 205 |
202003 | 2 | 205 |
我曾多次尝试获取上述数据,但我一直在努力获取正确的格式和值。以下是我尝试过的东西
SELECT convert(nvarchar(4),Year([DateStatusStart])) + RIGHT('00' + CONVERT(NVARCHAR(2), DATEPART(Month, [DateStatusStart])), 2)
,[VoucherStatusKey]
,SUM([Voucher_amt]) OVER (PARTITION BY Year([DateStatusStart]),Month([DateStatusStart]), [VoucherStatusKey] ORDER BY [DateStatusStart]) AS running_total
FROM #HDimVouchers where [DateStatusStart] < '2020-03-31';
假设您想要月末的值。那么,您可以采取以下做法:
- 为每张凭证生成所有适当的月份。
- 使用
join
引入适当的值。
对于第一部分,您可以使用理货单或日历 table(如果有的话)。然而,递归 CTE 也很方便:
with vdates as (
select voucher_id, eomonth(min(DateStatusStart)) as eom
from HDimVouchers
group by voucher_id
union all
select voucher_id, eomonth(dateadd(month, 1, eom))
from vdates
where eom < '2020-03-01'
)
select vd.*, hv.Voucher_amt
from vdates vd join
HDimVouchers hv
on hv.voucher_id = vd.voucher_id and
vd.eom >= hv.DateStatusStart and
(vd.eom <= hv.DateStatusEnd or hv.DateStatusEnd is null)
order by vd.eom, vd.voucher_id;
Here 是一个 db<>fiddle.
我对此的看法是:
;with [dates] as (
select YEAR(MIN([DateStatusStart]))*100+MONTH(MIN([DateStatusStart])) [YM] from #HDimVouchers
union all
select case when ([dates].[YM] % 100) = 12 then [dates].[YM] + 100 - 11 else [dates].[YM] + 1 end from [dates] where [YM] < 202112
), [dimkeys] as (
select
[Voucher_id],
YEAR(MIN([DateStatusStart]))*100+MONTH(MIN([DateStatusStart])) [DateStatusStart],
YEAR(MAX(ISNULL([DateStatusEnd], DATEFROMPARTS(2999, 12, 31))))*100+MONTH(MAX(ISNULL([DateStatusEnd], DATEFROMPARTS(2999, 12, 31)))) [DateStatusEnd]
from [#HDimVouchers] group by [Voucher_id]
), [map] as (
select
[dimkeys].[Voucher_id],
[dates].[YM],
COALESCE(
MAX([d].[DateStatusStart]),
(select MAX([i].[DateStatusStart]) from [#HDimVouchers] [i] where [i].[Voucher_id] = [dimkeys].[Voucher_id] and YEAR([i].[DateStatusStart])*100+MONTH([i].[DateStatusStart]) < [dates].[YM]),
(select MIN([i].[DateStatusStart]) from [#HDimVouchers] [i] where [i].[Voucher_id] = [dimkeys].[Voucher_id])
) [MappingDate]
from [dates]
cross join [dimkeys]
left join [#HDimVouchers] [d] on [d].[Voucher_id] = [dimkeys].[Voucher_id] and YEAR([d].[DateStatusStart])*100+MONTH([d].[DateStatusStart]) = [dates].[YM]
where [dates].[YM] >= [dimkeys].[DateStatusStart] and [dates].[YM] <= [dimkeys].[DateStatusEnd]
group by [dimkeys].[Voucher_id], [dates].[YM]
)
select [map].[YM], [fact].[VoucherStatusKey], SUM([fact].[Voucher_amt]) [Sum]
from [map] join [#HDimVouchers] [fact] on [fact].[Voucher_id] = [map].[Voucher_id] and [fact].[DateStatusStart] = [map].[MappingDate]
group by [map].[YM], [fact].[VoucherStatusKey]
order by [YM], [VoucherStatusKey];
所以:
- 获取从开始到结束的所有年月值
- 获取所有不同的键及其总体 min/max 日期(即成员存在时)
- 交叉加入他们以获得每个年月的每个键的条目(在成员的生命周期内)
- 添加应该用于映射的日期(这用于决定将在年-月中更改的成员添加到哪个月份)
- 然后才将其与完整维度结合起来并按 SCD 类型 2 属性分组
更新
对于大表,您可以将其拆分为多个临时表。表,而不是全力以赴的 CTE。这通常对性能有很大帮助。
select *,
YEAR([DateStatusStart])*100+MONTH([DateStatusStart]) [YmStart],
YEAR([DateStatusEnd])*100+MONTH([DateStatusEnd]) [YmEnd]
into [#withYm]
from [#HDimVouchers];
;with [dates] as (
select MIN([YmStart]) [YM] from [#withYm]
union all
select case when ([dates].[YM] % 100) = 12 then [dates].[YM] + 100 - 11 else [dates].[YM] + 1 end from [dates] where [YM] < 202112
), [dimkeys] as (
select
[Voucher_id],
MIN([YmStart]) [YmStart],
MAX(ISNULL([YmEnd], 299912)) [YmEnd]
from [#withYm]
group by [Voucher_id]
)
select
[dimkeys].[Voucher_id],
[dates].[YM]
into [#all]
from [dates]
cross join [dimkeys]
where [dates].[YM] >= [dimkeys].[YmStart] and [dates].[YM] <= [dimkeys].[YmEnd]
;with [map] as (
select
[#all].[Voucher_id],
[#all].[YM],
ISNULL(
MAX([d].[DateStatusStart]),
(select MAX([i].[DateStatusStart]) from [#withYm] [i] where [i].[Voucher_id] = [#all].[Voucher_id] and [i].[YmStart] < [#all].[YM])
) [MappingDate]
from [#all]
left join [#HDimVouchers] [d] on [d].[Voucher_id] = [#all].[Voucher_id] and YEAR([d].[DateStatusStart])*100+MONTH([d].[DateStatusStart]) = [#all].[YM]
group by [#all].[Voucher_id], [#all].[YM]
)
select [map].[YM], [fact].[VoucherStatusKey], SUM([fact].[Voucher_amt]) [Sum]
from [map] join [#HDimVouchers] [fact] on [fact].[Voucher_id] = [map].[Voucher_id] and [fact].[DateStatusStart] = [map].[MappingDate]
group by [map].[YM], [fact].[VoucherStatusKey]
order by [YM], [VoucherStatusKey];