将 bigquery 中的数组与空数组连接起来
Concatenating arrays in bigquery with empty arrays
我有一个 table 看起来像这样:
然后我尝试将每种标签的label1、label2、label3聚合到一个数组中,然后,最后我想将所有非空标签放入一个组合数组中。所以我的查询看起来像这样
#standardSQL
WITH
table AS (
SELECT 'abc' id, 1 label1, 12 label2, 122 label3 UNION ALL
SELECT 'abc', 1, 12, 129 UNION ALL
SELECT 'xyz', 2, 23, NULL UNION ALL
SELECT 'xyz', 2, 24, NULL
),
each_label_agg AS (
SELECT
id,
ARRAY_AGG(label1 IGNORE NULLS) AS label1_agg,
ARRAY_AGG(label2 IGNORE NULLS) AS label2_agg,
ARRAY_AGG(label3 IGNORE NULLS) AS label3_agg
FROM
table
GROUP BY
id)
SELECT
each_label_agg.*,
ARRAY_CONCAT(each_label_agg.label1_agg, each_label_agg.label2_agg,
each_label_agg.label3_agg) AS combined_labels
FROM
each_label_agg
输出如下所示:
但是在输出中我期望 combined_labels
是 [2,2,23,24]
for id xyz
.
ignore nulls
参数在 array_concat
中不起作用。我猜测 combined_labels
由于 label3
处的空数组而变得畸形。如何获得 xyz
的预期 combined_labels
为 [2,2,23,24]
?
#standardSQL
WITH table AS (
SELECT 'abc' id, 1 label1, 12 label2, 122 label3 UNION ALL
SELECT 'abc', 1, 12, 129 UNION ALL
SELECT 'xyz', 2, 23, NULL UNION ALL
SELECT 'xyz', 2, 24, NULL
), each_label_agg AS (
SELECT
id,
ARRAY_AGG(label1 IGNORE NULLS) AS label1_agg,
ARRAY_AGG(label2 IGNORE NULLS) AS label2_agg,
ARRAY_AGG(label3 IGNORE NULLS) AS label3_agg
FROM table
GROUP BY id
)
SELECT
each_label_agg.*,
ARRAY_CONCAT(
IFNULL(each_label_agg.label1_agg, []),
IFNULL(each_label_agg.label2_agg, []),
IFNULL(each_label_agg.label3_agg, [])
) AS combined_labels
FROM each_label_agg
造成此问题的原因是 BigQuery 有 limitations respect to NULL
values and the array generation will return NULL
if any argument is NULL
as documented here。因此,用它们代替空数组很方便(因为 NULL
s 和空数组在 BigQuery 中是两个不同的值)
我有一个 table 看起来像这样:
然后我尝试将每种标签的label1、label2、label3聚合到一个数组中,然后,最后我想将所有非空标签放入一个组合数组中。所以我的查询看起来像这样
#standardSQL
WITH
table AS (
SELECT 'abc' id, 1 label1, 12 label2, 122 label3 UNION ALL
SELECT 'abc', 1, 12, 129 UNION ALL
SELECT 'xyz', 2, 23, NULL UNION ALL
SELECT 'xyz', 2, 24, NULL
),
each_label_agg AS (
SELECT
id,
ARRAY_AGG(label1 IGNORE NULLS) AS label1_agg,
ARRAY_AGG(label2 IGNORE NULLS) AS label2_agg,
ARRAY_AGG(label3 IGNORE NULLS) AS label3_agg
FROM
table
GROUP BY
id)
SELECT
each_label_agg.*,
ARRAY_CONCAT(each_label_agg.label1_agg, each_label_agg.label2_agg,
each_label_agg.label3_agg) AS combined_labels
FROM
each_label_agg
输出如下所示:
但是在输出中我期望 combined_labels
是 [2,2,23,24]
for id xyz
.
ignore nulls
参数在 array_concat
中不起作用。我猜测 combined_labels
由于 label3
处的空数组而变得畸形。如何获得 xyz
的预期 combined_labels
为 [2,2,23,24]
?
#standardSQL
WITH table AS (
SELECT 'abc' id, 1 label1, 12 label2, 122 label3 UNION ALL
SELECT 'abc', 1, 12, 129 UNION ALL
SELECT 'xyz', 2, 23, NULL UNION ALL
SELECT 'xyz', 2, 24, NULL
), each_label_agg AS (
SELECT
id,
ARRAY_AGG(label1 IGNORE NULLS) AS label1_agg,
ARRAY_AGG(label2 IGNORE NULLS) AS label2_agg,
ARRAY_AGG(label3 IGNORE NULLS) AS label3_agg
FROM table
GROUP BY id
)
SELECT
each_label_agg.*,
ARRAY_CONCAT(
IFNULL(each_label_agg.label1_agg, []),
IFNULL(each_label_agg.label2_agg, []),
IFNULL(each_label_agg.label3_agg, [])
) AS combined_labels
FROM each_label_agg
造成此问题的原因是 BigQuery 有 limitations respect to NULL
values and the array generation will return NULL
if any argument is NULL
as documented here。因此,用它们代替空数组很方便(因为 NULL
s 和空数组在 BigQuery 中是两个不同的值)