某列的累计和

Cumulative sum of a column

我有一个 table 具有以下数据。

COUNTRY LEVEL   NUM_OF_DUPLICATES
  US    9           6
  US    8           24
  US    7           12
  US    6           20
  US    5           39
  US    4           81
  US    3           80
  US    2           430
  US    1           178
  US    0           430

我编写了一个查询来计算累积行的总和并得到以下输出。

COUNTRY LEVEL   NUM_OF_DUPLICATES      POOL
  US    9           6                   6
  US    8           24                  30
  US    7           12                  42
  US    6           20                  62
  US    5           39                  101
  US    4           81                  182
  US    3           80                  262
  US    2           130                 392
  US    1           178                 570
  US    0           254                 824

现在我想过滤数据,只取 POOL <=300 的地方,如果 POOL 字段没有值 300 那么我应该取 300 之后的第一个值。所以,在上面的例子中我们字段 POOL 中没有值 300,所以我们取 300 之后的下一个立即值 392。所以我需要一个查询,以便我可以提取记录 POOL <= 392(按照上面的示例),这将产生我的输出为

COUNTRY LEVEL   NUM_OF_DUPLICATES      POOL
  US    9           6                   6
  US    8           24                  30
  US    7           12                  42
  US    6           20                  62
  US    5           39                  101
  US    4           81                  182
  US    3           80                  262
  US    2           130                 392

请告诉我您的想法。提前致谢。

declare @t table(Country varchar(5), Level int, Num_of_Duplicates int)
insert into @t(Country, Level, Num_of_Duplicates)
values
('US', 9, 6),
('US', 8, 24),
('US', 7, 12),
('US', 6, 20),
('US', 5, 39),
('US', 4, 81),
('US', 3, 80),
('US', 2, 130/*-92*/),
('US', 1, 178),
('US', 0, 430);


select *, sum(Num_of_Duplicates) over(partition by country order by Level desc),
(sum(Num_of_Duplicates) over(partition by country order by Level desc)-Num_of_Duplicates) / 300 as flag,--any row which starts before 300 will have flag=0
--or
case when sum(Num_of_Duplicates) over(partition by country order by Level desc)-Num_of_Duplicates < 300 then 1 else 0 end as startsbefore300
from @t;

select *
from
   (
    select *, sum(Num_of_Duplicates) over(partition by country order by Level desc) as Pool
    from @t
) as t
where Pool - Num_of_Duplicates < 300 ;

这里的逻辑很简单:

  • 计算当前行的 运行 和 POOL 值。
  • 过滤行,使上一行的总数小于 300,您可以减去当前行的值,或使用第二个总和
  • 如果到当前行的总数恰好是300,则前一行会少,所以这一行会被包含
  • 如果当前行总数超过300,但前一行,那么它也会被包含
  • 排除所有更高的行

It's unclear what ordering you want. I've used NUM_OF_DUPLICATES column ascending, but you may want something else

SELECT
    COUNTRY,
    LEVEL,
    NUM_OF_DUPLICATES,
    POOL
FROM (
    SELECT *,
        POOL = SUM(NUM_OF_DUPLICATES) OVER (ORDER BY NUM_OF_DUPLICATES ROWS UNBOUNDED PRECEDING)
        -- alternative calculation
        -- ,POOLPrev = SUM(NUM_OF_DUPLICATES) OVER (ORDER BY NUM_OF_DUPLICATES ROWS UNBOUNDED PRECEDING AND 1 PRECEDING)
    FROM YourTable
) t
WHERE POOL - NUM_OF_DUPLICATES < 300;
-- you could also use POOLPrev above

我用了两个临时表来得到答案。

 DECLARE @t TABLE(Country VARCHAR(5), [Level] INT, Num_of_Duplicates INT)
 INSERT INTO @t(Country, Level, Num_of_Duplicates)
  VALUES ('US', 9, 6),
      ('US', 8, 24),
      ('US', 7, 12),
      ('US', 6, 20),
      ('US', 5, 39),
      ('US', 4, 81),
      ('US', 3, 80),
      ('US', 2, 130),
      ('US', 1, 178),
      ('US', 0, 254);


SELECT 
    Country
    ,Level
    , Num_of_Duplicates
    , SUM (Num_of_Duplicates) OVER (ORDER BY id) AS [POOL]
 INTO #temp_table
FROM
   (
    SELECT
        Country,
        level,
        Num_of_Duplicates,
        ROW_NUMBER() OVER (ORDER BY country) AS id
    FROM    @t
 ) AS A 


SELECT  
    [POOL],
    ROW_NUMBER() OVER (ORDER BY [POOL] )   AS [rank]
INTO #Temp_2
FROM  #temp_table 
WHERE [POOL] >= 300


SELECT * 

FROM #temp_table WHERE 
[POOL]   <= (SELECT [POOL] FROM #Temp_2 WHERE [rank] = 1 ) 

    
DROP TABLE #temp_table
DROP TABLE #Temp_2