Google Bigquery 上的聚合聚合
Aggregation of aggregation on Google Bigquery
我的数据是这样的
WITH test AS (
SELECT * FROM UNNEST([
STRUCT('2019-10-26' as date,'1.8025137' AS article_id, 'Digital Paying' as user_type,'open' as openmode, '123' as uid),
('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
('2019-10-26','1.8025137' , 'Registered','close', '231'),
('2019-10-26','1.8025137' , 'Registered','open', '431'),
('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
('2019-10-26','1.8025137' , 'Anonymous','close', '111')
])
),
-- first level of aggregation, prepare for fine tuning
date_article as (
SELECT
date,
article_id,
ARRAY_AGG(struct(user_type,openmode, uid)) AS ut
FROM test
GROUP BY 1,2
)
(SELECT
date,
article_id,
-- feed sub-query output into an array "action"
array(SELECT AS STRUCT
user_type as user_type, -- re-group data within the array by field "action"
array_agg(struct(openmode as openmode,uid as uid) ) op
FROM UNNEST(ut)
GROUP BY 1
) as user_types
FROM date_article)
我的目标是汇总 user_types.op.openmode 和 user_types.op.uid
user_types.user_type 不创建任何重复项:
你把事情搞得有点复杂了。如果可能的话,先做你的'normal' SQL,然后再格式化成arrays/structs。
WITH test AS (
SELECT * FROM UNNEST([
STRUCT('2019-10-26' as date,'1.8025137' AS article_id, 'Digital Paying' as user_type,'open' as openmode, '123' as uid),
('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
('2019-10-26','1.8025137' , 'Registered','close', '231'),
('2019-10-26','1.8025137' , 'Registered','open', '431'),
('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
('2019-10-26','1.8025137' , 'Anonymous','close', '111')
])
),
agg as (
select
date,
article_id,
user_type,
openmode,
count(distinct uid) as uids
from test
group by 1,2,3,4
),
final as (
select
date,
article_id,
user_type,
array_agg(struct(openmode, uids)) as subfields
from agg
group by 1,2,3
)
select * from final
我想你正在寻找以下
#standardSQL
WITH test AS (
SELECT * FROM UNNEST([
STRUCT('2019-10-26' AS DATE,'1.8025137' AS article_id, 'Digital Paying' AS user_type,'open' AS openmode, '123' AS uid),
('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
('2019-10-26','1.8025137' , 'Registered','close', '231'),
('2019-10-26','1.8025137' , 'Registered','open', '431'),
('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
('2019-10-26','1.8025137' , 'Anonymous','close', '111')
])
), users_agg AS (
SELECT DATE, article_id, user_type, openmode, COUNT(DISTINCT uid) AS uids
FROM test GROUP BY 1,2,3,4
), modes_agg AS (
SELECT DATE, article_id, user_type, ARRAY_AGG(STRUCT(openmode, uids)) AS modes
FROM users_agg GROUP BY 1,2,3
), types_agg AS (
SELECT DATE, article_id, ARRAY_AGG(STRUCT(user_type, modes)) types
FROM modes_agg GROUP BY 1,2
), article_agg AS (
SELECT DATE, ARRAY_AGG(STRUCT(article_id, types)) articles
FROM types_agg GROUP BY 1
)
SELECT *
FROM article_agg
结果
我的数据是这样的
WITH test AS (
SELECT * FROM UNNEST([
STRUCT('2019-10-26' as date,'1.8025137' AS article_id, 'Digital Paying' as user_type,'open' as openmode, '123' as uid),
('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
('2019-10-26','1.8025137' , 'Registered','close', '231'),
('2019-10-26','1.8025137' , 'Registered','open', '431'),
('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
('2019-10-26','1.8025137' , 'Anonymous','close', '111')
])
),
-- first level of aggregation, prepare for fine tuning
date_article as (
SELECT
date,
article_id,
ARRAY_AGG(struct(user_type,openmode, uid)) AS ut
FROM test
GROUP BY 1,2
)
(SELECT
date,
article_id,
-- feed sub-query output into an array "action"
array(SELECT AS STRUCT
user_type as user_type, -- re-group data within the array by field "action"
array_agg(struct(openmode as openmode,uid as uid) ) op
FROM UNNEST(ut)
GROUP BY 1
) as user_types
FROM date_article)
我的目标是汇总 user_types.op.openmode 和 user_types.op.uid user_types.user_type 不创建任何重复项:
你把事情搞得有点复杂了。如果可能的话,先做你的'normal' SQL,然后再格式化成arrays/structs。
WITH test AS (
SELECT * FROM UNNEST([
STRUCT('2019-10-26' as date,'1.8025137' AS article_id, 'Digital Paying' as user_type,'open' as openmode, '123' as uid),
('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
('2019-10-26','1.8025137' , 'Registered','close', '231'),
('2019-10-26','1.8025137' , 'Registered','open', '431'),
('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
('2019-10-26','1.8025137' , 'Anonymous','close', '111')
])
),
agg as (
select
date,
article_id,
user_type,
openmode,
count(distinct uid) as uids
from test
group by 1,2,3,4
),
final as (
select
date,
article_id,
user_type,
array_agg(struct(openmode, uids)) as subfields
from agg
group by 1,2,3
)
select * from final
我想你正在寻找以下
#standardSQL
WITH test AS (
SELECT * FROM UNNEST([
STRUCT('2019-10-26' AS DATE,'1.8025137' AS article_id, 'Digital Paying' AS user_type,'open' AS openmode, '123' AS uid),
('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
('2019-10-26','1.8025137' , 'Registered','close', '231'),
('2019-10-26','1.8025137' , 'Registered','open', '431'),
('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
('2019-10-26','1.8025137' , 'Anonymous','close', '111')
])
), users_agg AS (
SELECT DATE, article_id, user_type, openmode, COUNT(DISTINCT uid) AS uids
FROM test GROUP BY 1,2,3,4
), modes_agg AS (
SELECT DATE, article_id, user_type, ARRAY_AGG(STRUCT(openmode, uids)) AS modes
FROM users_agg GROUP BY 1,2,3
), types_agg AS (
SELECT DATE, article_id, ARRAY_AGG(STRUCT(user_type, modes)) types
FROM modes_agg GROUP BY 1,2
), article_agg AS (
SELECT DATE, ARRAY_AGG(STRUCT(article_id, types)) articles
FROM types_agg GROUP BY 1
)
SELECT *
FROM article_agg
结果