Google Bigquery 上的聚合聚合

Question

我的数据是这样的

WITH test AS (
  SELECT * FROM UNNEST([
    STRUCT('2019-10-26' as date,'1.8025137' AS article_id, 'Digital Paying' as user_type,'open' as openmode, '123' as uid),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
    ('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
    ('2019-10-26','1.8025137' , 'Registered','close', '231'),
    ('2019-10-26','1.8025137' , 'Registered','open', '431'),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
    ('2019-10-26','1.8025137' , 'Anonymous','close', '111')
  ])  
),
-- first level of aggregation, prepare for fine tuning
date_article as (
  SELECT 
    date,
    article_id,
    ARRAY_AGG(struct(user_type,openmode, uid)) AS ut
  FROM test
  GROUP BY 1,2
)

 (SELECT 
  date,
  article_id,
  -- feed sub-query output into an array "action"
  array(SELECT AS STRUCT 
     user_type as user_type, -- re-group data within the array by field "action"
     array_agg(struct(openmode as openmode,uid as uid) ) op 
   FROM UNNEST(ut)
   GROUP BY 1
   ) as user_types
FROM date_article)

我的目标是汇总 user_types.op.openmode 和 user_types.op.uid user_types.user_type 不创建任何重复项：

Answer 1

你把事情搞得有点复杂了。如果可能的话，先做你的'normal' SQL，然后再格式化成arrays/structs。

WITH test AS (
  SELECT * FROM UNNEST([
    STRUCT('2019-10-26' as date,'1.8025137' AS article_id, 'Digital Paying' as user_type,'open' as openmode, '123' as uid),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
    ('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
    ('2019-10-26','1.8025137' , 'Registered','close', '231'),
    ('2019-10-26','1.8025137' , 'Registered','open', '431'),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
    ('2019-10-26','1.8025137' , 'Anonymous','close', '111')
  ])  
),
agg as (
  select
    date,
    article_id,
    user_type,
    openmode,
    count(distinct uid) as uids
  from test
  group by 1,2,3,4
),
final as (
  select
    date,
    article_id,
    user_type,
    array_agg(struct(openmode, uids)) as subfields
  from agg
  group by 1,2,3
)
select * from final

Answer 2

我想你正在寻找以下

#standardSQL
WITH test AS (
  SELECT * FROM UNNEST([
    STRUCT('2019-10-26' AS DATE,'1.8025137' AS article_id, 'Digital Paying' AS user_type,'open' AS openmode, '123' AS uid),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
    ('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
    ('2019-10-26','1.8025137' , 'Registered','close', '231'),
    ('2019-10-26','1.8025137' , 'Registered','open', '431'),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
    ('2019-10-26','1.8025137' , 'Anonymous','close', '111')
  ])  
), users_agg AS (
  SELECT DATE, article_id, user_type, openmode, COUNT(DISTINCT uid) AS uids 
  FROM test GROUP BY 1,2,3,4
), modes_agg AS (
  SELECT DATE, article_id, user_type, ARRAY_AGG(STRUCT(openmode, uids)) AS modes
  FROM users_agg GROUP BY 1,2,3
), types_agg AS (
  SELECT DATE, article_id, ARRAY_AGG(STRUCT(user_type, modes)) types
  FROM modes_agg GROUP BY 1,2
), article_agg AS (
  SELECT DATE, ARRAY_AGG(STRUCT(article_id, types)) articles
  FROM types_agg GROUP BY 1
) 
SELECT *
FROM article_agg

结果

Google Bigquery 上的聚合聚合

Aggregation of aggregation on Google Bigquery

struct

nested

aggregation

google-bigquery