在 Snowflake 查询中使用 window 函数进行条件分组
Conditional group by with window function in Snowflake query
我在 Snowflake 中有一个 table,格式如下:
create temp_test(name string, split string, value int)
insert into temp_test
values ('A','a', 100), ('A','b', 200), ('A','c',300), ('A', 'd', 400), ('A', 'e',500), ('B', 'a', 1000), ('B','b', 2000), ('B','c', 3000), ('B', 'd',4000), ('B','e', 5000)
第一步,我只需要 每个名称的前 2 个值(按值排序),所以我使用以下查询来获取:
select name, split, value,
row_number() over (PARTITION BY (name) order by value desc) as row_num
from temp_test
qualify row_num <= 2
这给了我以下结果集:
NAME SPLIT VALUE ROW_NUM
A e 500 1
A d 400 2
B e 5000 1
B d 4000 2
现在,我需要对 Top 2 以外的值求和并将其放入名为 "Others" 的不同拆分中,如下所示:
NAME SPLIT VALUE
A e 500
A d 400
A Others 600
B e 5000
B d 4000
B Others 6000
如何在 Snowflake 查询或 SQL 中执行此操作?
with data as (
select name, split, value,
row_number() over (partition by (name) order by value desc) as row_num
from temp_test
)
select
name,
case when row_num <= 2 then split else 'Others' end as split,
sum(value) as value
from data
group by name, case when row_num <= 2 then row_num else 3 end
Shawnt00的回答很好,但是为了记录在Snowflake中这个可以写得更简单:
首先末尾的group by可以通过index或者name来引用结果:
GROUP BY 1,2
或
GROUP BY name, split
此外,由于 CASE 只有太多分支,因此可以使用 IFF,并且您似乎正在使用 CTE 添加 row_number 您也可以将 IFF 推入 CTE
WITH data AS (
SELECT name, value,
ROW_NUMBER() OVER (PARTITION BY name ORDER BY value DESC) AS row_num,
IFF(row_num < 3, split, 'Others') as n_split
FROM VALUES ('A','a', 100), ('A','b', 200), ('A','c',300), ('A', 'd', 400),
('A', 'e',500), ('B', 'a', 1000), ('B','b', 2000), ('B','c', 3000),
('B', 'd',4000), ('B','e', 5000)
v(name, split, value)
)
SELECT
name,
n_split,
SUM(value) AS value
FROM data
GROUP BY name, n_split;
如果超级热衷于小 SQL 将 ROW_NUMBER 推入 IFF:
WITH data AS (
SELECT name, value,
IFF(ROW_NUMBER() OVER (PARTITION BY name ORDER BY value DESC) < 3, split, 'Others') as n_split
FROM VALUES ('A','a', 100), ('A','b', 200), ('A','c',300), ('A', 'd', 400),
('A', 'e',500), ('B', 'a', 1000), ('B','b', 2000), ('B','c', 3000),
('B', 'd',4000), ('B','e', 5000)
v(name, split, value)
)
SELECT
name,
n_split AS split,
SUM(value) AS value
FROM data
GROUP BY name, n_split;
给出:
NAME SPLIT VALUE
A e 500
A d 400
A Others 600
B e 5000
B d 4000
B Others 6000
我在 Snowflake 中有一个 table,格式如下:
create temp_test(name string, split string, value int)
insert into temp_test
values ('A','a', 100), ('A','b', 200), ('A','c',300), ('A', 'd', 400), ('A', 'e',500), ('B', 'a', 1000), ('B','b', 2000), ('B','c', 3000), ('B', 'd',4000), ('B','e', 5000)
第一步,我只需要 每个名称的前 2 个值(按值排序),所以我使用以下查询来获取:
select name, split, value,
row_number() over (PARTITION BY (name) order by value desc) as row_num
from temp_test
qualify row_num <= 2
这给了我以下结果集:
NAME SPLIT VALUE ROW_NUM
A e 500 1
A d 400 2
B e 5000 1
B d 4000 2
现在,我需要对 Top 2 以外的值求和并将其放入名为 "Others" 的不同拆分中,如下所示:
NAME SPLIT VALUE
A e 500
A d 400
A Others 600
B e 5000
B d 4000
B Others 6000
如何在 Snowflake 查询或 SQL 中执行此操作?
with data as (
select name, split, value,
row_number() over (partition by (name) order by value desc) as row_num
from temp_test
)
select
name,
case when row_num <= 2 then split else 'Others' end as split,
sum(value) as value
from data
group by name, case when row_num <= 2 then row_num else 3 end
Shawnt00的回答很好,但是为了记录在Snowflake中这个可以写得更简单:
首先末尾的group by可以通过index或者name来引用结果:
GROUP BY 1,2
或
GROUP BY name, split
此外,由于 CASE 只有太多分支,因此可以使用 IFF,并且您似乎正在使用 CTE 添加 row_number 您也可以将 IFF 推入 CTE
WITH data AS (
SELECT name, value,
ROW_NUMBER() OVER (PARTITION BY name ORDER BY value DESC) AS row_num,
IFF(row_num < 3, split, 'Others') as n_split
FROM VALUES ('A','a', 100), ('A','b', 200), ('A','c',300), ('A', 'd', 400),
('A', 'e',500), ('B', 'a', 1000), ('B','b', 2000), ('B','c', 3000),
('B', 'd',4000), ('B','e', 5000)
v(name, split, value)
)
SELECT
name,
n_split,
SUM(value) AS value
FROM data
GROUP BY name, n_split;
如果超级热衷于小 SQL 将 ROW_NUMBER 推入 IFF:
WITH data AS (
SELECT name, value,
IFF(ROW_NUMBER() OVER (PARTITION BY name ORDER BY value DESC) < 3, split, 'Others') as n_split
FROM VALUES ('A','a', 100), ('A','b', 200), ('A','c',300), ('A', 'd', 400),
('A', 'e',500), ('B', 'a', 1000), ('B','b', 2000), ('B','c', 3000),
('B', 'd',4000), ('B','e', 5000)
v(name, split, value)
)
SELECT
name,
n_split AS split,
SUM(value) AS value
FROM data
GROUP BY name, n_split;
给出:
NAME SPLIT VALUE
A e 500
A d 400
A Others 600
B e 5000
B d 4000
B Others 6000