某列的累计和
Cumulative sum of a column
我有一个 table 具有以下数据。
COUNTRY LEVEL NUM_OF_DUPLICATES
US 9 6
US 8 24
US 7 12
US 6 20
US 5 39
US 4 81
US 3 80
US 2 430
US 1 178
US 0 430
我编写了一个查询来计算累积行的总和并得到以下输出。
COUNTRY LEVEL NUM_OF_DUPLICATES POOL
US 9 6 6
US 8 24 30
US 7 12 42
US 6 20 62
US 5 39 101
US 4 81 182
US 3 80 262
US 2 130 392
US 1 178 570
US 0 254 824
现在我想过滤数据,只取 POOL <=300 的地方,如果 POOL 字段没有值 300 那么我应该取 300 之后的第一个值。所以,在上面的例子中我们字段 POOL 中没有值 300,所以我们取 300 之后的下一个立即值 392。所以我需要一个查询,以便我可以提取记录 POOL <= 392(按照上面的示例),这将产生我的输出为
COUNTRY LEVEL NUM_OF_DUPLICATES POOL
US 9 6 6
US 8 24 30
US 7 12 42
US 6 20 62
US 5 39 101
US 4 81 182
US 3 80 262
US 2 130 392
请告诉我您的想法。提前致谢。
declare @t table(Country varchar(5), Level int, Num_of_Duplicates int)
insert into @t(Country, Level, Num_of_Duplicates)
values
('US', 9, 6),
('US', 8, 24),
('US', 7, 12),
('US', 6, 20),
('US', 5, 39),
('US', 4, 81),
('US', 3, 80),
('US', 2, 130/*-92*/),
('US', 1, 178),
('US', 0, 430);
select *, sum(Num_of_Duplicates) over(partition by country order by Level desc),
(sum(Num_of_Duplicates) over(partition by country order by Level desc)-Num_of_Duplicates) / 300 as flag,--any row which starts before 300 will have flag=0
--or
case when sum(Num_of_Duplicates) over(partition by country order by Level desc)-Num_of_Duplicates < 300 then 1 else 0 end as startsbefore300
from @t;
select *
from
(
select *, sum(Num_of_Duplicates) over(partition by country order by Level desc) as Pool
from @t
) as t
where Pool - Num_of_Duplicates < 300 ;
这里的逻辑很简单:
- 计算当前行的 运行 和
POOL
值。
- 过滤行,使上一行的总数小于 300,您可以减去当前行的值,或使用第二个总和
- 如果到当前行的总数恰好是300,则前一行会少,所以这一行会被包含
- 如果当前行总数超过300,但前一行少,那么它也会被包含
- 排除所有更高的行
It's unclear what ordering you want. I've used NUM_OF_DUPLICATES
column ascending, but you may want something else
SELECT
COUNTRY,
LEVEL,
NUM_OF_DUPLICATES,
POOL
FROM (
SELECT *,
POOL = SUM(NUM_OF_DUPLICATES) OVER (ORDER BY NUM_OF_DUPLICATES ROWS UNBOUNDED PRECEDING)
-- alternative calculation
-- ,POOLPrev = SUM(NUM_OF_DUPLICATES) OVER (ORDER BY NUM_OF_DUPLICATES ROWS UNBOUNDED PRECEDING AND 1 PRECEDING)
FROM YourTable
) t
WHERE POOL - NUM_OF_DUPLICATES < 300;
-- you could also use POOLPrev above
我用了两个临时表来得到答案。
DECLARE @t TABLE(Country VARCHAR(5), [Level] INT, Num_of_Duplicates INT)
INSERT INTO @t(Country, Level, Num_of_Duplicates)
VALUES ('US', 9, 6),
('US', 8, 24),
('US', 7, 12),
('US', 6, 20),
('US', 5, 39),
('US', 4, 81),
('US', 3, 80),
('US', 2, 130),
('US', 1, 178),
('US', 0, 254);
SELECT
Country
,Level
, Num_of_Duplicates
, SUM (Num_of_Duplicates) OVER (ORDER BY id) AS [POOL]
INTO #temp_table
FROM
(
SELECT
Country,
level,
Num_of_Duplicates,
ROW_NUMBER() OVER (ORDER BY country) AS id
FROM @t
) AS A
SELECT
[POOL],
ROW_NUMBER() OVER (ORDER BY [POOL] ) AS [rank]
INTO #Temp_2
FROM #temp_table
WHERE [POOL] >= 300
SELECT *
FROM #temp_table WHERE
[POOL] <= (SELECT [POOL] FROM #Temp_2 WHERE [rank] = 1 )
DROP TABLE #temp_table
DROP TABLE #Temp_2
我有一个 table 具有以下数据。
COUNTRY LEVEL NUM_OF_DUPLICATES
US 9 6
US 8 24
US 7 12
US 6 20
US 5 39
US 4 81
US 3 80
US 2 430
US 1 178
US 0 430
我编写了一个查询来计算累积行的总和并得到以下输出。
COUNTRY LEVEL NUM_OF_DUPLICATES POOL
US 9 6 6
US 8 24 30
US 7 12 42
US 6 20 62
US 5 39 101
US 4 81 182
US 3 80 262
US 2 130 392
US 1 178 570
US 0 254 824
现在我想过滤数据,只取 POOL <=300 的地方,如果 POOL 字段没有值 300 那么我应该取 300 之后的第一个值。所以,在上面的例子中我们字段 POOL 中没有值 300,所以我们取 300 之后的下一个立即值 392。所以我需要一个查询,以便我可以提取记录 POOL <= 392(按照上面的示例),这将产生我的输出为
COUNTRY LEVEL NUM_OF_DUPLICATES POOL
US 9 6 6
US 8 24 30
US 7 12 42
US 6 20 62
US 5 39 101
US 4 81 182
US 3 80 262
US 2 130 392
请告诉我您的想法。提前致谢。
declare @t table(Country varchar(5), Level int, Num_of_Duplicates int)
insert into @t(Country, Level, Num_of_Duplicates)
values
('US', 9, 6),
('US', 8, 24),
('US', 7, 12),
('US', 6, 20),
('US', 5, 39),
('US', 4, 81),
('US', 3, 80),
('US', 2, 130/*-92*/),
('US', 1, 178),
('US', 0, 430);
select *, sum(Num_of_Duplicates) over(partition by country order by Level desc),
(sum(Num_of_Duplicates) over(partition by country order by Level desc)-Num_of_Duplicates) / 300 as flag,--any row which starts before 300 will have flag=0
--or
case when sum(Num_of_Duplicates) over(partition by country order by Level desc)-Num_of_Duplicates < 300 then 1 else 0 end as startsbefore300
from @t;
select *
from
(
select *, sum(Num_of_Duplicates) over(partition by country order by Level desc) as Pool
from @t
) as t
where Pool - Num_of_Duplicates < 300 ;
这里的逻辑很简单:
- 计算当前行的 运行 和
POOL
值。 - 过滤行,使上一行的总数小于 300,您可以减去当前行的值,或使用第二个总和
- 如果到当前行的总数恰好是300,则前一行会少,所以这一行会被包含
- 如果当前行总数超过300,但前一行少,那么它也会被包含
- 排除所有更高的行
It's unclear what ordering you want. I've used
NUM_OF_DUPLICATES
column ascending, but you may want something else
SELECT
COUNTRY,
LEVEL,
NUM_OF_DUPLICATES,
POOL
FROM (
SELECT *,
POOL = SUM(NUM_OF_DUPLICATES) OVER (ORDER BY NUM_OF_DUPLICATES ROWS UNBOUNDED PRECEDING)
-- alternative calculation
-- ,POOLPrev = SUM(NUM_OF_DUPLICATES) OVER (ORDER BY NUM_OF_DUPLICATES ROWS UNBOUNDED PRECEDING AND 1 PRECEDING)
FROM YourTable
) t
WHERE POOL - NUM_OF_DUPLICATES < 300;
-- you could also use POOLPrev above
我用了两个临时表来得到答案。
DECLARE @t TABLE(Country VARCHAR(5), [Level] INT, Num_of_Duplicates INT)
INSERT INTO @t(Country, Level, Num_of_Duplicates)
VALUES ('US', 9, 6),
('US', 8, 24),
('US', 7, 12),
('US', 6, 20),
('US', 5, 39),
('US', 4, 81),
('US', 3, 80),
('US', 2, 130),
('US', 1, 178),
('US', 0, 254);
SELECT
Country
,Level
, Num_of_Duplicates
, SUM (Num_of_Duplicates) OVER (ORDER BY id) AS [POOL]
INTO #temp_table
FROM
(
SELECT
Country,
level,
Num_of_Duplicates,
ROW_NUMBER() OVER (ORDER BY country) AS id
FROM @t
) AS A
SELECT
[POOL],
ROW_NUMBER() OVER (ORDER BY [POOL] ) AS [rank]
INTO #Temp_2
FROM #temp_table
WHERE [POOL] >= 300
SELECT *
FROM #temp_table WHERE
[POOL] <= (SELECT [POOL] FROM #Temp_2 WHERE [rank] = 1 )
DROP TABLE #temp_table
DROP TABLE #Temp_2