SQL 按 Type2 SC 维度的月份汇总的状态总值

SQL Summary of Status total values by Month of Type2 SC Dimension

我有凭证状态历史记录 table 作为类型 2 缓慢变化的维度 table 我试图在特定日期之前的每个月获取每个状态的汇总总值。这是我的架构和插入代码:

CREATE TABLE #HDimVouchers(
       [HVoucherKey] [bigint] IDENTITY(1,1) NOT NULL,
       [Voucher_id] [bigint] NOT NULL,
       [VoucherStatusKey] [int] NOT NULL,
       [Voucher_amt] [decimal](18, 2) NULL,    
       [DateStatusStart] [date] NULL,       
       [DateStatusEnd] [date] NULL
     
)
--drop table #HDimVouchers
insert #HDimVouchers 
values
(10,2,10.00,'2019-01-01','2019-02-15'),
(10,4,10.00,'2019-02-16',null),
(13,4,10.00,'2019-01-10',null),
(11,2,15.00,'2019-01-01',null),
(12,2,20.00,'2019-03-12','2019-03-12'),
(12,4,20.00,'2019-03-13',null),
(15,2,205.00,'2019-05-25','2020-04-24'),
(15,6,205.00,'2020-04-25',null),
(21,2,100.00,'2019-02-16',null)

我想通过 voucherstatuskey 获得按年-月的总价值摘要,如下所示:

[Year-Month] [VoucherStatusKey] [Amount]
201901 2 25
201901 4 10
201902 2 100
201902 4 10
201903 4 20
201905 2 205
201906 2 205
201907 2 205
201908 2 205
201909 2 205
201910 2 205
201911 2 205
201912 2 205
202001 2 205
202002 2 205
202003 2 205

我曾多次尝试获取上述数据,但我一直在努力获取正确的格式和值。以下是我尝试过的东西

SELECT  convert(nvarchar(4),Year([DateStatusStart])) + RIGHT('00' + CONVERT(NVARCHAR(2), DATEPART(Month, [DateStatusStart])), 2)
,[VoucherStatusKey]
,SUM([Voucher_amt]) OVER (PARTITION BY Year([DateStatusStart]),Month([DateStatusStart]), [VoucherStatusKey] ORDER BY [DateStatusStart]) AS running_total 
FROM #HDimVouchers where [DateStatusStart] < '2020-03-31';

假设您想要月末的值。那么,您可以采取以下做法:

  • 为每张凭证生成所有适当的月份。
  • 使用 join 引入适当的值。

对于第一部分,您可以使用理货单或日历 table(如果有的话)。然而,递归 CTE 也很方便:

with vdates as (
      select voucher_id, eomonth(min(DateStatusStart)) as eom
      from HDimVouchers
      group by voucher_id
      union all
      select voucher_id, eomonth(dateadd(month, 1, eom))
      from vdates
      where eom < '2020-03-01'
     )
select vd.*, hv.Voucher_amt
from vdates vd join
     HDimVouchers hv
     on hv.voucher_id = vd.voucher_id and
        vd.eom >= hv.DateStatusStart and
        (vd.eom <= hv.DateStatusEnd or hv.DateStatusEnd is null)
order by vd.eom, vd.voucher_id;

Here 是一个 db<>fiddle.

我对此的看法是:

;with [dates] as (
    select YEAR(MIN([DateStatusStart]))*100+MONTH(MIN([DateStatusStart])) [YM] from #HDimVouchers
    union all
    select case when ([dates].[YM] % 100) = 12 then [dates].[YM] + 100 - 11 else [dates].[YM] + 1 end from [dates] where [YM] < 202112
), [dimkeys] as (
    select
        [Voucher_id],
        YEAR(MIN([DateStatusStart]))*100+MONTH(MIN([DateStatusStart])) [DateStatusStart],
        YEAR(MAX(ISNULL([DateStatusEnd], DATEFROMPARTS(2999, 12, 31))))*100+MONTH(MAX(ISNULL([DateStatusEnd], DATEFROMPARTS(2999, 12, 31)))) [DateStatusEnd]
        from [#HDimVouchers] group by [Voucher_id]
), [map] as (
    select
        [dimkeys].[Voucher_id],
        [dates].[YM],
        COALESCE(
            MAX([d].[DateStatusStart]),
            (select MAX([i].[DateStatusStart]) from [#HDimVouchers] [i] where [i].[Voucher_id] = [dimkeys].[Voucher_id] and YEAR([i].[DateStatusStart])*100+MONTH([i].[DateStatusStart]) < [dates].[YM]),
            (select MIN([i].[DateStatusStart]) from [#HDimVouchers] [i] where [i].[Voucher_id] = [dimkeys].[Voucher_id])
        ) [MappingDate]
    from [dates]
    cross join [dimkeys]
    left join [#HDimVouchers] [d] on [d].[Voucher_id] = [dimkeys].[Voucher_id] and YEAR([d].[DateStatusStart])*100+MONTH([d].[DateStatusStart]) = [dates].[YM]
    where [dates].[YM] >= [dimkeys].[DateStatusStart] and [dates].[YM] <= [dimkeys].[DateStatusEnd]
    group by [dimkeys].[Voucher_id], [dates].[YM]
)
select [map].[YM], [fact].[VoucherStatusKey], SUM([fact].[Voucher_amt]) [Sum]
from [map] join [#HDimVouchers] [fact] on [fact].[Voucher_id] = [map].[Voucher_id] and [fact].[DateStatusStart] = [map].[MappingDate]
group by [map].[YM], [fact].[VoucherStatusKey]
order by [YM], [VoucherStatusKey];

所以:

  • 获取从开始到结束的所有年月值
  • 获取所有不同的键及其总体 min/max 日期(即成员存在时)
  • 交叉加入他们以获得每个年月的每个键的条目(在成员的生命周期内)
  • 添加应该用于映射的日期(这用于决定将在年-月中更改的成员添加到哪个月份)
  • 然后才将其与完整维度结合起来并按 SCD 类型 2 属性分组

更新

对于大表,您可以将其拆分为多个临时表。表,而不是全力以赴的 CTE。这通常对性能有很大帮助。

select *,
    YEAR([DateStatusStart])*100+MONTH([DateStatusStart]) [YmStart],
    YEAR([DateStatusEnd])*100+MONTH([DateStatusEnd]) [YmEnd]
into [#withYm]
from [#HDimVouchers];

;with [dates] as (
    select MIN([YmStart]) [YM] from [#withYm]
    union all
    select case when ([dates].[YM] % 100) = 12 then [dates].[YM] + 100 - 11 else [dates].[YM] + 1 end from [dates] where [YM] < 202112
), [dimkeys] as (
    select
        [Voucher_id],
        MIN([YmStart]) [YmStart],
        MAX(ISNULL([YmEnd], 299912)) [YmEnd]
    from [#withYm]
    group by [Voucher_id]
)
select
    [dimkeys].[Voucher_id],
    [dates].[YM]
into [#all]
from [dates]
cross join [dimkeys]
where [dates].[YM] >= [dimkeys].[YmStart] and [dates].[YM] <= [dimkeys].[YmEnd]

;with [map] as (
    select
        [#all].[Voucher_id],
        [#all].[YM],
        ISNULL(
            MAX([d].[DateStatusStart]),
            (select MAX([i].[DateStatusStart]) from [#withYm] [i] where [i].[Voucher_id] = [#all].[Voucher_id] and [i].[YmStart] < [#all].[YM])
        ) [MappingDate]
    from [#all]
    left join [#HDimVouchers] [d] on [d].[Voucher_id] = [#all].[Voucher_id] and YEAR([d].[DateStatusStart])*100+MONTH([d].[DateStatusStart]) = [#all].[YM]
    group by [#all].[Voucher_id], [#all].[YM]
)
select [map].[YM], [fact].[VoucherStatusKey], SUM([fact].[Voucher_amt]) [Sum]
from [map] join [#HDimVouchers] [fact] on [fact].[Voucher_id] = [map].[Voucher_id] and [fact].[DateStatusStart] = [map].[MappingDate]
group by [map].[YM], [fact].[VoucherStatusKey]
order by [YM], [VoucherStatusKey];