如何使用 PIVOT 在 bigQuery 中进行数据透视?
How to pivot in bigQuery using PIVOT?
我试图在 bigquery 中将行提取为列。
这就是我的数据现在的样子:
这就是我希望我的数据的样子:
PS:虽然我在 SUB_CLASS_DESC
列中只显示了 3 个值,但实际计数以 100 为单位。因此,我希望根据 towardsdatascience.com 中的文档 here. I followed the example shared here 使用过程语言并编写以下代码,但不幸的是,这不起作用:
DECLARE DEPT_CLASS_SUB_CLASS STRING;
SET DEPT_CLASS_SUB_CLASS = (SELECT CONCAT('("', STRING_AGG(DISTINCT DEPT_CLASS_SUB_CLASS, '", "'), '")')
FROM `analytics-mkt-cleanroom.Workspace.HS_AF_SG_R12_800K_SAMPLE_SALES_11_TEST`
);
EXECUTE IMMEDIATE FORMAT("""
CREATE OR REPLACE TABLE `analytics-mkt-cleanroom.Workspace.HS_AF_SG_R12_800K_SAMPLE_SALES_PIVOTED_12_TEST` AS
SELECT * FROM
(SELECT HH_ID,DEPT_CLASS_SUB_CLASS,SALE_AMT
FROM `analytics-mkt-cleanroom.Workspace.HS_AF_SG_R12_800K_SAMPLE_SALES_11_TEST`
)
PIVOT
(SUM(SALE_AMT)
,FOR DEPT_CLASS_SUB_CLASS IN %s
)""",DEPT_CLASS_SUB_CLASS);
我得到的错误:
错误信息提示在execute块之前声明,我也是这么做的,但我不明白为什么错误仍然存在。
我尝试以不同的方式声明变量 DEPT_CLASS_SUB_CLASS 但还没有成功。谁能指出我哪里可能犯了错误。
非常感谢!
考虑以下方法
execute immediate (select '''
select *
from your_table
pivot (any_value(sale_amt) for replace(sub_class_desc, ' ', '_') in (''' || list || '''))
'''
from (
select string_agg(distinct "'" || replace(sub_class_desc, ' ', '_') || "'") list
from your_table
)
)
如果像您的问题一样应用于虚拟数据 - 输出是
How can I save these results into a new pivoted table? Specifically where can I put my CREATE OR REPLACE TABLE?
execute immediate (select '''
create or replace table `your_project.your_dataset.pivot_table` as
select *
from your_table
pivot (any_value(sale_amt) for replace(sub_class_desc, ' ', '_') in (''' || list || '''))
'''
from (
select string_agg(distinct "'" || replace(sub_class_desc, ' ', '_') || "'") list
from your_table
)
);
DEPT_CLASS_SUB_CLASS
变量应该放在任何其他语句之前,而不仅仅是在引用的执行块之前。
从您的错误消息来看,您似乎在 [411:1] 处声明了一个变量,这意味着在 411 行。请将它移到脚本的第 1 行顶部并再次测试。
您遇到了 PIVOTing 问题。我写下了一些测试查询,这些查询同时按字母顺序执行 PIVOTing 和列表列。
DECLARE sample_data ARRAY<STRUCT<HH_ID STRING, SUB_CLASS_DESC STRING, SALE_AMT FLOAT64>> DEFAULT [
('HHH_001', 'K&B FIXTURE/PLUMBING', 139.),
('HHH_001', 'PULLDOWN KITCHEN FAUCETS', 129.),
('HHH_001', 'TUBULAR REPAIR & REPLACE', 0.)
];
CREATE TEMP TABLE data AS
SELECT r.* REPLACE(TRANSLATE(SUB_CLASS_DESC, ' &/', '___') AS SUB_CLASS_DESC)
FROM UNNEST(sample_data) r
;
EXECUTE IMMEDIATE FORMAT ("""
SELECT *
FROM data
PIVOT (SUM(SALE_AMT) AS sale_amt FOR SUB_CLASS_DESC IN ('%s'));
""", (SELECT STRING_AGG(DISTINCT SUB_CLASS_DESC, "','" ORDER BY SUB_CLASS_DESC ASC) FROM data)
);
Query Result
我试图在 bigquery 中将行提取为列。
这就是我的数据现在的样子:
这就是我希望我的数据的样子:
PS:虽然我在 SUB_CLASS_DESC
列中只显示了 3 个值,但实际计数以 100 为单位。因此,我希望根据 towardsdatascience.com 中的文档 here. I followed the example shared here 使用过程语言并编写以下代码,但不幸的是,这不起作用:
DECLARE DEPT_CLASS_SUB_CLASS STRING;
SET DEPT_CLASS_SUB_CLASS = (SELECT CONCAT('("', STRING_AGG(DISTINCT DEPT_CLASS_SUB_CLASS, '", "'), '")')
FROM `analytics-mkt-cleanroom.Workspace.HS_AF_SG_R12_800K_SAMPLE_SALES_11_TEST`
);
EXECUTE IMMEDIATE FORMAT("""
CREATE OR REPLACE TABLE `analytics-mkt-cleanroom.Workspace.HS_AF_SG_R12_800K_SAMPLE_SALES_PIVOTED_12_TEST` AS
SELECT * FROM
(SELECT HH_ID,DEPT_CLASS_SUB_CLASS,SALE_AMT
FROM `analytics-mkt-cleanroom.Workspace.HS_AF_SG_R12_800K_SAMPLE_SALES_11_TEST`
)
PIVOT
(SUM(SALE_AMT)
,FOR DEPT_CLASS_SUB_CLASS IN %s
)""",DEPT_CLASS_SUB_CLASS);
我得到的错误:
错误信息提示在execute块之前声明,我也是这么做的,但我不明白为什么错误仍然存在。 我尝试以不同的方式声明变量 DEPT_CLASS_SUB_CLASS 但还没有成功。谁能指出我哪里可能犯了错误。
非常感谢!
考虑以下方法
execute immediate (select '''
select *
from your_table
pivot (any_value(sale_amt) for replace(sub_class_desc, ' ', '_') in (''' || list || '''))
'''
from (
select string_agg(distinct "'" || replace(sub_class_desc, ' ', '_') || "'") list
from your_table
)
)
如果像您的问题一样应用于虚拟数据 - 输出是
How can I save these results into a new pivoted table? Specifically where can I put my CREATE OR REPLACE TABLE?
execute immediate (select '''
create or replace table `your_project.your_dataset.pivot_table` as
select *
from your_table
pivot (any_value(sale_amt) for replace(sub_class_desc, ' ', '_') in (''' || list || '''))
'''
from (
select string_agg(distinct "'" || replace(sub_class_desc, ' ', '_') || "'") list
from your_table
)
);
DEPT_CLASS_SUB_CLASS
变量应该放在任何其他语句之前,而不仅仅是在引用的执行块之前。 从您的错误消息来看,您似乎在 [411:1] 处声明了一个变量,这意味着在 411 行。请将它移到脚本的第 1 行顶部并再次测试。您遇到了 PIVOTing 问题。我写下了一些测试查询,这些查询同时按字母顺序执行 PIVOTing 和列表列。
DECLARE sample_data ARRAY<STRUCT<HH_ID STRING, SUB_CLASS_DESC STRING, SALE_AMT FLOAT64>> DEFAULT [
('HHH_001', 'K&B FIXTURE/PLUMBING', 139.),
('HHH_001', 'PULLDOWN KITCHEN FAUCETS', 129.),
('HHH_001', 'TUBULAR REPAIR & REPLACE', 0.)
];
CREATE TEMP TABLE data AS
SELECT r.* REPLACE(TRANSLATE(SUB_CLASS_DESC, ' &/', '___') AS SUB_CLASS_DESC)
FROM UNNEST(sample_data) r
;
EXECUTE IMMEDIATE FORMAT ("""
SELECT *
FROM data
PIVOT (SUM(SALE_AMT) AS sale_amt FOR SUB_CLASS_DESC IN ('%s'));
""", (SELECT STRING_AGG(DISTINCT SUB_CLASS_DESC, "','" ORDER BY SUB_CLASS_DESC ASC) FROM data)
);
Query Result