递归 CTE 来填充多个季度的最新可用数据?

Recursive CTE to populate latest available data across many quarters?

这是一个示例数据集:

CREATE TABLE UholdingsPE(
   Reference_Date DATE  NOT NULL PRIMARY KEY
  ,Investment     VARCHAR(14) NOT NULL
  ,Holding        VARCHAR(9) NOT NULL
  ,Price          INTEGER  NOT NULL
);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('3/31/2017','Example Fund 1','Apple',1);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('3/31/2017','Example Fund 1','Microsoft',1);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('6/30/2017','Example Fund 1','Apple',1);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('6/30/2017','Example Fund 1','Microsoft',1);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('9/30/2017','Example Fund 1','Apple',1);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('9/30/2017','Example Fund 1','Microsoft',1);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('12/31/2017','Example Fund 1','Apple',1);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('12/31/2017','Example Fund 1','Microsoft',1);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('6/30/2018','Example Fund 1','Apple',22);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('6/30/2018','Example Fund 1','Microsoft',22);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('12/31/2018','Example Fund 1','Apple',33);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('12/31/2018','Example Fund 1','Microsoft',33);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('6/30/2019','Example Fund 1','Apple',44);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('6/30/2019','Example Fund 1','Microsoft',44);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('12/31/2019','Example Fund 1','Apple',55);
INSERT INTO UholdingsPE(Reference_Date,Investment,Holding,Price) VALUES ('12/31/2019','Example Fund 1','Microsoft',55);

我们每季度从我们的共同基金中收到股份。它们按参考日期、投资和控股加载,使每一行都不同。此数据放在 MSFT SQL 服务器的数据库中。有时,基金不会提交该季度的持股情况。在这种情况下,我们希望提取最新的可用季度持股量(这可能是 3 或 4 个季度前),这样在对多个参考日期进行趋势分析时就不会出现差距。

例如,假设我们有一个基金有可用的 SQL 行,参考日期为 3/31/19、9/30/19 和 3/31/20(6/30/19 和 12 /31/19 丢失)。我想弄清楚如何用最后可用的季度数据来填补这些缺失的季度。因此,6/30/19 将是 3/31/19 的数据。 12/31/19 将使用 9/30/19 的数据。但是,如果某一季度某个基金的数据存在,我们将只使用该数据。这是我目前正在使用的查询,但它只返回一个季度并用上一季度的数据填充本季度的数据,如果当前季度的数据丢失:

DECLARE @MaxDate Date = '20180930'  --Typically this would be the max [Reference Date] grouped by Fund as we cannot pass parameters in our BI tool

;WITH Latest_Fund
AS (
        SELECT *,
            ROW_NUMBER() OVER (PARTITION BY Investment ORDER BY EOMONTH([Reference Date],0) DESC) AS rn  
        FROM UholdingsPE  
        WHERE EOMONTH([Reference Date],0) >= DATEADD("MONTH",-3,@MaxDate)  
            AND EOMONTH([Reference Date],0) <= @MaxDate  
)  
SELECT
F.[Reference Date], *  
FROM DBO.UholdingsPE Z  
INNER JOIN Latest_Fund F  
    ON (Z.Investment = F.Investment or Z.Investment = F.Investment)  
        AND EOMONTH(F.[Reference Date],0) = EOMONTH(Z.[Reference Date],0)  
WHERE F.rn = 1 

这是我希望看到的示例:

这是我正在使用的 table 的完整 DDL(请注意 [当前值] 与价格相同):


USE [EPM_Datamart]
GO

/****** Object:  Table [dbo].[UHoldingsPE]    Script Date: 6/30/2020 1:02:28 PM ******/
SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO

CREATE TABLE [dbo].[UHoldingsPE](
    [Datekey] [int] NOT NULL,
    [Investmentkey] [int] NOT NULL,
    [Holdingskey] [int] NOT NULL,
    [Primekey] [decimal](38, 0) NOT NULL,
    [Dcfkey] [decimal](38, 0) NOT NULL,
    [Cakey] [decimal](38, 0) NOT NULL,
    [Data Source] [varchar](100) NULL,
    [Debt Maturity 1 Year] [float] NULL,
    [Debt Maturity 2 Years] [float] NULL,
    [Debt Maturity 3 Years] [float] NULL,
    [Debt Maturity 4 Years] [float] NULL,
    [Debt Maturity 5 Years] [float] NULL,
    [Debt Maturity After 5 Years] [float] NULL,
    [Book Value] [float] NULL,
    [Buyer] [varchar](1000) NULL,
    [Capital IQ ID] [int] NULL,
    [Cash] [float] NULL,
    [City] [varchar](1000) NULL,
    [Comments] [varchar](8000) NULL,
    [Committed Capital] [float] NULL,
    [Holding Value Adjustment] [float] NULL,
    [Country] [varchar](1000) NULL,
    [Covenant Basis] [varchar](1000) NULL,
    [Covenant Status] [varchar](1000) NULL,
    [Currency Type] [varchar](100) NULL,
    [Current Cost] [float] NULL,
    [Current Value] [float] NULL,
    [Debt Ownership Percentage] [float] NULL,
    [Debt Recoursed To Fund] [float] NULL,
    [Default On Payments] [varchar](1000) NULL,
    [Delist Date] [date] NULL,
    [Disclosure Restrictions (If Any)] [nvarchar](1000) NULL,
    [Dividends] [float] NULL,
    [EBITDA (LTM) Acquired] [float] NULL,
    [Enterprise Value Of M&A Transactions (Net)] [float] NULL,
    [Enterprise Value] [float] NULL,
    [Exchange] [varchar](1000) NULL,
    [Exit IRR] [float] NULL,
    [Exit Style] [varchar](1000) NULL,
    [Exit Year] [int] NULL,
    [Financial Year-End] [varchar](1000) NULL,
    [Fixed or Floating Rate] [float] NULL,
    [Fund Undiluted Ownership] [float] NULL,
    [GP Board Seat Holders] [varchar](1000) NULL,
    [GP Board Seats] [int] NULL,
    [Gross Debt] [float] NULL,
    [Gross IRR] [float] NULL,
    [Holdco Domicile] [varchar](1000) NULL,
    [Holding Currency] [varchar](100) NULL,
    [Holding Description] [nvarchar](4000) NULL,
    [Holding Name] [varchar](1000) NULL,
    [Holding Type] [varchar](1000) NULL,
    [Holding] [nvarchar](1000) NULL,
    [Industry] [varchar](1000) NULL,
    [Industry Group] [varchar](1000) NULL,
    [Initial Acquisition] [varchar](1000) NULL,
    [Initial Book Value] [float] NULL,
    [Initial Cash] [float] NULL,
    [Initial Co-Investor Amount Invested] [float] NULL,
    [Initial Comments] [varchar](1000) NULL,
    [Initial Control In Deal] [varchar](1000) NULL,
    [Initial Deal Source] [varchar](1000) NULL,
    [Initial Deal Type] [varchar](1000) NULL,
    [Initial Debt Ownership Percentage] [varchar](1000) NULL,
    [Initial Enterprise Value] [float] NULL,
    [Total Equity Value] [float] NULL,
    [Initial Gross Debt] [float] NULL,
    [Initial Investment Date] [date] NULL,
    [Initial Investment Ownership] [float] NULL,
    [Initial Investment] [float] NULL,
    [Initial LP Co-Investors?] [varchar](1000) NULL,
    [Initial LP Co-Investors] [varchar](1000) NULL,
    [Initial LTM CAPEX] [float] NULL,
    [Initial LTM EBITDA] [float] NULL,
    [Initial LTM Gross Profit] [float] NULL,
    [Initial LTM Interest Expense] [float] NULL,
    [Initial LTM Net Income] [float] NULL,
    [Initial LTM Revenue] [float] NULL,
    [Initial Main Instrument] [varchar](1000) NULL,
    [Initial Management Holding Ownership] [float] NULL,
    [Initial Management Ownership] [float] NULL,
    [Initial Multiple Type] [varchar](1000) NULL,
    [Initial Multiple] [float] NULL,
    [Initial Net Debt] [float] NULL,
    [Initial Number Of Employees] [int] NULL,
    [Initial Other Valuation Adjustment] [float] NULL,
    [Initial Total Equity Value] [float] NULL,
    [Initial Valuation Method] [varchar](1000) NULL,
    [Initial Valuation Support Amount] [float] NULL,
    [Initial Value Driver Date] [date] NULL,
    [Initial Value Driver Type] [varchar](1000) NULL,
    [Initial Vehicle Ownership] [float] NULL,
    [Interests Dividends Other Income] [float] NULL,
    [Interests] [float] NULL,
    [Invested Capital] [float] NULL,
    [Investment Ownership] [float] NULL,
    [Investment Status] [varchar](1000) NULL,
    [Investment] [nvarchar](1000) NULL,
    [IPO Date] [date] NULL,
    [IPO Share Price] [float] NULL,
    [Last Realization Date] [date] NULL,
    [Last Updated] [datetime] NOT NULL,
    [Last Valuation Date] [date] NULL,
    [Line Of Credit Outstanding?] [float] NULL,
    [Lock-Up Period End Date] [date] NULL,
    [LTM CAPEX] [float] NULL,
    [LTM EBITDA] [float] NULL,
    [LTM Gross Profit] [float] NULL,
    [LTM Interest Expense] [float] NULL,
    [LTM Net Income] [float] NULL,
    [LTM Revenue] [float] NULL,
    [M&A Activity Notes] [nvarchar](1000) NULL,
    [Main Instrument] [varchar](1000) NULL,
    [Management Holding Ownership] [float] NULL,
    [Management Ownership] [float] NULL,
    [Max Multiple] [float] NULL,
    [Method Of Exit] [varchar](1000) NULL,
    [Min Multiple] [float] NULL,
    [Multiple Type] [varchar](1000) NULL,
    [Multiple] [float] NULL,
    [Net Debt] [float] NULL,
    [Number Of Employees] [int] NULL,
    [Number Of M&A Transactions (Net)] [float] NULL,
    [Number Of Shares] [float] NULL,
    [Operating Data Date 1] [date] NULL,
    [Operating Data Date 2] [date] NULL,
    [Operating Data Date 3] [date] NULL,
    [Operating Data Type 1] [nvarchar](1000) NULL,
    [Operating Data Type 2] [nvarchar](1000) NULL,
    [Operating Data Type 3] [nvarchar](1000) NULL,
    [Other Income] [float] NULL,
    [Period End Date For LTM] [date] NULL,
    [PIK Toggle] [varchar](1000) NULL,
    [Position Comments] [varchar](1000) NULL,
    [Primary Market (Region)] [nvarchar](1000) NULL,
    [Realized Gains/Losses] [float] NULL,
    [Realized Proceeds] [float] NULL,
    [Reference Date] [date] NULL,
    [Return Of Cost] [float] NULL,
    [Revenue (LTM) Acquired] [float] NULL,
    [Sector] [varchar](1000) NULL,
    [Seller] [varchar](1000) NULL,
    [State] [varchar](1000) NULL,
    [Strategy] [varchar](1000) NULL,
    [Sub-Industry] [varchar](1000) NULL,
    [Ticker] [varchar](1000) NULL,
    [Total # Of Board Members] [int] NULL,
    [Total Invested (LP Co-Investors)] [float] NULL,
    [Total Investment Value] [float] NULL,
    [Transaction Co-Leads] [varchar](1000) NULL,
    [Transaction Costs] [float] NULL,
    [Transaction Lead] [nvarchar](1000) NULL,
    [TTM EBITDA 2] [float] NULL,
    [TTM EBITDA 3] [float] NULL,
    [TTM Revenue 2] [float] NULL,
    [TTM Revenue 3] [float] NULL,
    [Valuation Comments] [varchar](1000) NULL,
    [Valuation Method] [varchar](1000) NULL,
    [Value Driver Amount] [float] NULL,
    [Value Driver Date] [date] NULL,
    [Value Driver Type] [varchar](1000) NULL,
    [Vehicle Ownership At Exit] [float] NULL,
    [Vehicle Ownership] [float] NULL,
    [Website] [varchar](1000) NULL,
    [Asset Class] [varchar](1000) NULL
) ON [PRIMARY]
GO

一种解决方案是从UholdingsPEtable得到Reference_Date列的最小值和最大值之间的所有四分之一的所有组合以及所有不同的值InvestmentHolding 列。所有行都从 cih CTE 返回。之后,需要提取UholdingsPE行的Price列的值table最接近Reference_Date列的值cih CTE 当前行的 Reference_Date 列的值。

with
  cc as (
    select
      min(Reference_Date) as Reference_Date,
      max(Reference_Date) as xq
    from UholdingsPE
    union all
    select eomonth(dateadd(qq, 1, q)), xq
    from cc
    where q < xq
  ),
  ii as (select distinct Investment from UholdingsPE),
  hh as (select distinct Holding from UholdingsPE),
  cih as (
    select Reference_Date, Investment, Holding
    from cc
    cross join ii
    cross join hh
  )
select *
from cih
cross apply (
  select a.Price
  from UholdingsPE as a
  where
    a.Reference_Date <= cih.Reference_Date and
    a.Investment = cih.Investment and
    a.Holding = cih.Holding
  order by a.Reference_Date desc
    offset 0 rows
    fetch first 1 row only
) as ca
order by Reference_Date, Investment, Holding
option (maxrecursion 0);

输出:

+----+---------------------+----------------+-----------+-------+
|    |    Reference_Date   |   Investment   |  Holding  | Price |
+----+---------------------+----------------+-----------+-------+
|  1 | 31.03.2017 00:00:00 | Example Fund 1 | Apple     |     1 |
|  2 | 31.03.2017 00:00:00 | Example Fund 1 | Microsoft |     1 |
|  3 | 30.06.2017 00:00:00 | Example Fund 1 | Apple     |     1 |
|  4 | 30.06.2017 00:00:00 | Example Fund 1 | Microsoft |     1 |
|  5 | 30.09.2017 00:00:00 | Example Fund 1 | Apple     |     1 |
|  6 | 30.09.2017 00:00:00 | Example Fund 1 | Microsoft |     1 |
|  7 | 31.12.2017 00:00:00 | Example Fund 1 | Apple     |     1 |
|  8 | 31.12.2017 00:00:00 | Example Fund 1 | Microsoft |     1 |
|  9 | 31.03.2018 00:00:00 | Example Fund 1 | Apple     |     1 |
| 10 | 31.03.2018 00:00:00 | Example Fund 1 | Microsoft |     1 |
| 11 | 30.06.2018 00:00:00 | Example Fund 1 | Apple     |    22 |
| 12 | 30.06.2018 00:00:00 | Example Fund 1 | Microsoft |    22 |
| 13 | 30.09.2018 00:00:00 | Example Fund 1 | Apple     |    22 |
| 14 | 30.09.2018 00:00:00 | Example Fund 1 | Microsoft |    22 |
| 15 | 31.12.2018 00:00:00 | Example Fund 1 | Apple     |    33 |
| 16 | 31.12.2018 00:00:00 | Example Fund 1 | Microsoft |    33 |
| 17 | 31.03.2019 00:00:00 | Example Fund 1 | Apple     |    33 |
| 18 | 31.03.2019 00:00:00 | Example Fund 1 | Microsoft |    33 |
| 19 | 30.06.2019 00:00:00 | Example Fund 1 | Apple     |    44 |
| 20 | 30.06.2019 00:00:00 | Example Fund 1 | Microsoft |    44 |
| 21 | 30.09.2019 00:00:00 | Example Fund 1 | Apple     |    44 |
| 22 | 30.09.2019 00:00:00 | Example Fund 1 | Microsoft |    44 |
| 23 | 31.12.2019 00:00:00 | Example Fund 1 | Apple     |    55 |
| 24 | 31.12.2019 00:00:00 | Example Fund 1 | Microsoft |    55 |
+----+---------------------+----------------+-----------+-------+

Demo.

让我们试试基于游标的解决方案。

declare
  @prev_inv varchar(14), @Investment varchar(14),
  @prev_hold varchar(9), @Holding varchar(9),
  @prev_ref_date date, @Reference_date date,
  @prev_price integer, @Price integer, @qdiff integer;
declare
  c cursor forward_only static read_only for
    select Investment, Holding, Reference_Date, Price from UholdingsPE
    union all
    select Investment, Holding, ref_date, -1
    from (select max(Reference_Date) as ref_date from UholdingsPE) as a
    cross join (select distinct Investment from UholdingsPE) as b
    cross join (select distinct Holding from UholdingsPE) as c
    order by Investment, Holding, Reference_Date, Price desc;
open c;
fetch next from c into @Investment, @Holding, @Reference_Date, @Price;
select
  @prev_inv = @Investment,
  @prev_hold = @Holding,
  @prev_ref_date = @Reference_date,
  @prev_price = @Price;
while @@fetch_status = 0 begin
  fetch next from c into @Investment, @Holding, @Reference_Date, @Price;
  set @qdiff = datediff(q, @prev_ref_date, @Reference_date);
  if @prev_inv = @Investment and
     @prev_hold = @Holding and
     @qdiff > 1
  begin
    insert into miss_UholdingsPE(Investment,
                                 Holding,
                                 Price,
                                 Reference_Date)
      select
        @prev_inv, @prev_hold, @prev_price,
        eomonth(dateadd(q, nums.n, @prev_ref_date))
      from nums
      where nums.n < @qdiff + iif(@Price = -1, 1, 0);
  end;
  select
    @prev_inv = @Investment,
    @prev_hold = @Holding,
    @prev_ref_date = @Reference_date,
    @prev_price = @Price;
end;
close c;
deallocate c;

Cursor-based solution demo.

另一个基于集合的解决方案:

with
  a as (select distinct Investment, Holding from stage_UholdingsPE),
  b as (
    select Q.Reference_Date, a.Investment, a.Holding
    from a
    cross join (
      select eomonth(dateadd(quarter, n, '20160331'))
      from Nums(0, datediff(quarter, '20160331', '20191231'))
    ) as Q(Reference_Date)
    where not exists(select * from stage_UholdingsPE as s
                     where
                       s.Reference_Date = Q.Reference_Date and
                       s.Investment = a.Investment and
                       s.Holding = a.Holding)
  )
insert into stage_UholdingsPE(Reference_Date, Investment, Holding, Price)
  select Reference_Date, Investment, Holding, null from b;
go
with
  a as (
    select
      *,
      count(Price) over(partition by Investment, Holding
                        order by Reference_Date
                        rows unbounded preceding) as c
    from stage_UholdingsPE
  )
insert into full_UholdingsPE(Reference_Date, Investment, Holding, Price)
  select
    Reference_Date, Investment, Holding,
    max(Price) over(partition by c) as Price
  from a;

Demo.

请尝试以下操作:

DECLARE @MaxDate Date = '20191231'  --Typically this would be the max Reference_Date grouped by Fund as we cannot pass parameters in our BI tool

declare @quarter_end_date date = (select min(Reference_Date) from UholdingsPE)
declare @start_date date = @quarter_end_date

declare cur cursor fast_forward for select distinct Investment, Holding from UholdingsPE

declare @Investment varchar(100), @Holding varchar(10)
declare @latest_holdings int

open cur
fetch next from cur into @Investment, @Holding
while @@FETCH_STATUS = 0
begin
    while (@quarter_end_date <= convert(date, @MaxDate))
    begin
        if ((select count(1) from UholdingsPE where Reference_Date = @quarter_end_date and Investment = @Investment and Holding = @Holding) = 0)
        begin
            set @latest_holdings = (select top 1 Price from UholdingsPE where Reference_Date <= @quarter_end_date and Investment = @Investment and Holding = @Holding order by Reference_Date desc)
            insert into UholdingsPE select @quarter_end_date, @Investment, @Holding, @latest_holdings
        end
        set @quarter_end_date = eomonth(dateadd(mm, 3, @quarter_end_date))
    end
    set @quarter_end_date = eomonth(dateadd(mm, 3, @start_date))
    fetch next from cur into @Investment, @Holding
end
close cur
deallocate cur

select * from UholdingsPE

请找到数据库<>fiddle here.