在 Snowflake 中使用 Count Distinct 和 Pivot
Using Count Distinct with Pivot in Snowflake
我正在尝试对列 Join_mon
进行旋转并获取每个 ID
的总计数,如以下查询所示;
select *
from CTE3
pivot(COUNT(DISTINCT platform_payer_name) for Join_Mon in (
'2021-03-01',
'2021-02-01',
'2021-01-01',
'2020-12-01'
))
as p
order by ID
)
如您所见,我正试图在 platform_payer_name
列中对她进行不同的计数。但它给出了以下错误;
SQL compilation error: syntax error line 48 at position 16 unexpected 'DISTINCT'
我非常肯定 DISTINCT
在雪花中与 COUNT
合作。我能得到一些帮助,为什么它在这里失败了。感谢帮助。
所以制作一些映射到你的数据中心的假数据,尽管我放弃了过多的参数
with cte3(id, platform_payer_name, Join_Mon) as (
select * from values
(1,'aa', '2021-03-01'),
(1,'aa', '2021-03-01'),
(1,'aa', '2021-03-01'),
(1,'aa', '2021-02-01'),
(2,'bb', '2012-03-01'),
(2,'cc', '2020-12-01')
)
select *
from CTE3 AS c
pivot(COUNT(c.platform_payer_name) for c.Join_Mon in (
'2021-03-01',
'2021-02-01',
'2021-01-01',
'2020-12-01' )
) as p
order by id;
给出:
ID '2021-03-01' '2021-02-01' '2021-01-01' '2020-12-01'
1 3 1 0 0
2 0 0 0 1
所以你想要 distinct
是有道理的
但是好像不支持..
所以虽然它在剪切和粘贴时容易出错,但它确实“有效”:
with cte3(id, platform_payer_name, Join_Mon) as (
select * from values
(1,'aa', '2021-03-01'),
(1,'aa', '2021-03-01'),
(1,'aa', '2021-03-01'),
(1,'aa', '2021-02-01'),
(2,'bb', '2012-03-01'),
(2,'cc', '2020-12-01')
)
select id
,count(distinct(iff(Join_Mon='2021-03-01',platform_payer_name,null))) as "2021-03-01"
,count(distinct(iff(Join_Mon='2021-02-01',platform_payer_name,null))) as "2021-02-01"
,count(distinct(iff(Join_Mon='2021-01-01',platform_payer_name,null))) as "2021-01-01"
,count(distinct(iff(Join_Mon='2020-12-01',platform_payer_name,null))) as "2020-12-01"
from CTE3 AS c
group by 1 order by 1;
给出:
ID 2021-03-01 2021-02-01 2021-01-01 2020-12-01
1 1 1 0 0
2 0 0 0 1
之所以有效,是因为 pivot 正在执行两项任务,第一项是在匹配输入的情况下将值移动到列中,因此这与:
with cte3(id, platform_payer_name, Join_Mon) as (
select * from values
(1,'aa', '2021-03-01'),
(1,'aa', '2021-03-01'),
(1,'aa', '2021-03-01'),
(1,'aa', '2021-02-01'),
(2,'bb', '2012-03-01'),
(2,'cc', '2020-12-01')
)
select id
,iff(Join_Mon='2021-03-01',platform_payer_name,null) as "2021-03-01"
,iff(Join_Mon='2021-02-01',platform_payer_name,null) as "2021-02-01"
,iff(Join_Mon='2021-01-01',platform_payer_name,null) as "2021-01-01"
,iff(Join_Mon='2020-12-01',platform_payer_name,null) as "2020-12-01"
from CTE3 AS c
order by 1;
给出:
ID, 2021-03-01, 2021-02-01, 2021-01-01, 2020-12-01
1, aa, NULL, NULL, NULL
1, aa, NULL, NULL, NULL
1, aa, NULL, NULL, NULL
1, NULL, aa, NULL, NULL
2, NULL, NULL, NULL, NULL
2, NULL, NULL, NULL, cc
然后可以在每一列上有一个 count(distinct x)
运行。
select id
,count(distinct("2021-03-01")) as "2021-03-01"
,count(distinct("2021-02-01")) as "2021-02-01"
,count(distinct("2021-01-01")) as "2021-01-01"
,count(distinct("2020-12-01")) as "2020-12-01"
from (
select id
,iff(Join_Mon='2021-03-01',platform_payer_name,null) as "2021-03-01"
,iff(Join_Mon='2021-02-01',platform_payer_name,null) as "2021-02-01"
,iff(Join_Mon='2021-01-01',platform_payer_name,null) as "2021-01-01"
,iff(Join_Mon='2020-12-01',platform_payer_name,null) as "2020-12-01"
from CTE3 AS c
)
group by id
order by id;
或者可以像我在第一个答案中展示的那样在线完成。
雪花支持COUNT_IF:
SELECT id,
COUNT_IF(join_mon='2021-03-01') AS "2021-03-01",
COUNT_IF(join_mon='2021-02-01') AS "2021-02-01",
COUNT_IF(join_mon='2021-01-01') AS "2021-01-01"
FROM (SELECT DISTINCT id, platform_name, join_mon FROM cte) s
GROUP BY id
ORDER BY id;
我正在尝试对列 Join_mon
进行旋转并获取每个 ID
的总计数,如以下查询所示;
select *
from CTE3
pivot(COUNT(DISTINCT platform_payer_name) for Join_Mon in (
'2021-03-01',
'2021-02-01',
'2021-01-01',
'2020-12-01'
))
as p
order by ID
)
如您所见,我正试图在 platform_payer_name
列中对她进行不同的计数。但它给出了以下错误;
SQL compilation error: syntax error line 48 at position 16 unexpected 'DISTINCT'
我非常肯定 DISTINCT
在雪花中与 COUNT
合作。我能得到一些帮助,为什么它在这里失败了。感谢帮助。
所以制作一些映射到你的数据中心的假数据,尽管我放弃了过多的参数
with cte3(id, platform_payer_name, Join_Mon) as (
select * from values
(1,'aa', '2021-03-01'),
(1,'aa', '2021-03-01'),
(1,'aa', '2021-03-01'),
(1,'aa', '2021-02-01'),
(2,'bb', '2012-03-01'),
(2,'cc', '2020-12-01')
)
select *
from CTE3 AS c
pivot(COUNT(c.platform_payer_name) for c.Join_Mon in (
'2021-03-01',
'2021-02-01',
'2021-01-01',
'2020-12-01' )
) as p
order by id;
给出:
ID '2021-03-01' '2021-02-01' '2021-01-01' '2020-12-01'
1 3 1 0 0
2 0 0 0 1
所以你想要 distinct
是有道理的
但是好像不支持..
所以虽然它在剪切和粘贴时容易出错,但它确实“有效”:
with cte3(id, platform_payer_name, Join_Mon) as (
select * from values
(1,'aa', '2021-03-01'),
(1,'aa', '2021-03-01'),
(1,'aa', '2021-03-01'),
(1,'aa', '2021-02-01'),
(2,'bb', '2012-03-01'),
(2,'cc', '2020-12-01')
)
select id
,count(distinct(iff(Join_Mon='2021-03-01',platform_payer_name,null))) as "2021-03-01"
,count(distinct(iff(Join_Mon='2021-02-01',platform_payer_name,null))) as "2021-02-01"
,count(distinct(iff(Join_Mon='2021-01-01',platform_payer_name,null))) as "2021-01-01"
,count(distinct(iff(Join_Mon='2020-12-01',platform_payer_name,null))) as "2020-12-01"
from CTE3 AS c
group by 1 order by 1;
给出:
ID 2021-03-01 2021-02-01 2021-01-01 2020-12-01
1 1 1 0 0
2 0 0 0 1
之所以有效,是因为 pivot 正在执行两项任务,第一项是在匹配输入的情况下将值移动到列中,因此这与:
with cte3(id, platform_payer_name, Join_Mon) as (
select * from values
(1,'aa', '2021-03-01'),
(1,'aa', '2021-03-01'),
(1,'aa', '2021-03-01'),
(1,'aa', '2021-02-01'),
(2,'bb', '2012-03-01'),
(2,'cc', '2020-12-01')
)
select id
,iff(Join_Mon='2021-03-01',platform_payer_name,null) as "2021-03-01"
,iff(Join_Mon='2021-02-01',platform_payer_name,null) as "2021-02-01"
,iff(Join_Mon='2021-01-01',platform_payer_name,null) as "2021-01-01"
,iff(Join_Mon='2020-12-01',platform_payer_name,null) as "2020-12-01"
from CTE3 AS c
order by 1;
给出:
ID, 2021-03-01, 2021-02-01, 2021-01-01, 2020-12-01
1, aa, NULL, NULL, NULL
1, aa, NULL, NULL, NULL
1, aa, NULL, NULL, NULL
1, NULL, aa, NULL, NULL
2, NULL, NULL, NULL, NULL
2, NULL, NULL, NULL, cc
然后可以在每一列上有一个 count(distinct x)
运行。
select id
,count(distinct("2021-03-01")) as "2021-03-01"
,count(distinct("2021-02-01")) as "2021-02-01"
,count(distinct("2021-01-01")) as "2021-01-01"
,count(distinct("2020-12-01")) as "2020-12-01"
from (
select id
,iff(Join_Mon='2021-03-01',platform_payer_name,null) as "2021-03-01"
,iff(Join_Mon='2021-02-01',platform_payer_name,null) as "2021-02-01"
,iff(Join_Mon='2021-01-01',platform_payer_name,null) as "2021-01-01"
,iff(Join_Mon='2020-12-01',platform_payer_name,null) as "2020-12-01"
from CTE3 AS c
)
group by id
order by id;
或者可以像我在第一个答案中展示的那样在线完成。
雪花支持COUNT_IF:
SELECT id,
COUNT_IF(join_mon='2021-03-01') AS "2021-03-01",
COUNT_IF(join_mon='2021-02-01') AS "2021-02-01",
COUNT_IF(join_mon='2021-01-01') AS "2021-01-01"
FROM (SELECT DISTINCT id, platform_name, join_mon FROM cte) s
GROUP BY id
ORDER BY id;