如何为 BigQuery 标准 SQL case 语句创建多个 'THEN' 子句?
How to create multiple 'THEN' clauses for a BigQuery standard SQL case statement?
我在 BigQuery 上使用标准 SQL 根据现有 table 中的某些条件创建新的 table。我有多个 WHEN 子句来支持这一点(因为我正在检查几个不同的条件)。我现在想要做的是在这些 WHEN 语句中有多个 THEN 子句,因为我打算添加多个列。
具体来说,我想将两个现有文本字段的串联添加为一个字段,然后将三个现有字段的聚合数组添加为一个字段:
CASE WHEN
# all three match
one_x1 = two_x1 = three_x1 THEN CONCAT( object1_name, ", ", object2_name, ", ", object3_name ) AND ARRAY_AGG(STRUCT(score_one, score_two, score_three))
# one and two match
WHEN one_x1 = two_x1 THEN CONCAT( object1_name, ", ", object2_name ) AND ARRAY_AGG(STRUCT(score_one, score_two))
# one and three match
WHEN one_x1 = three_x1 THEN CONCAT( object1_name, ", ", object3_name ) AND ARRAY_AGG(STRUCT(score_one, score_three))
# two and three match
WHEN two_x1 = three_x1 THEN CONCAT( object2_name, ", ", object3_name ) AND ARRAY_AGG(STRUCT(score_two, score_three))
ELSE
NULL
END
是'AND ARRAY_AGG(STRUCT(xxxxx))'部分不行,我也试过用逗号分隔THEN子句。
重复相同的 case 语句来单独分隔 THEN 子句是唯一的选择吗?
示例数据:
sample_data
示例数据第 1 行的预期结果:
here
如果您希望在结果集中生成两个单独的列,则需要创建两个 CASE
语句,例如:
CASE
WHEN one_x1 = two_x1 = three_x1 THEN CONCAT( object1_name, ", ", object2_name, ", ", object3_name )
WHEN one_x1 = two_x1 THEN CONCAT( object1_name, ", ", object2_name )
WHEN one_x1 = three_x1 THEN CONCAT( object1_name, ", ", object3_name )
WHEN two_x1 = three_x1 THEN CONCAT( object2_name, ", ", object3_name )
ELSE NULL
END AS field1,
CASE
WHEN one_x1 = two_x1 = three_x1 THEN ARRAY_AGG(STRUCT(score_one, score_two, score_three))
WHEN one_x1 = two_x1 THEN ARRAY_AGG(STRUCT(score_one, score_two))
WHEN one_x1 = three_x1 THEN ARRAY_AGG(STRUCT(score_one, score_three))
WHEN two_x1 = three_x1 THEN ARRAY_AGG(STRUCT(score_two, score_three))
ELSE NULL
END AS field2
以下适用于 BigQuery 标准 SQL
首先,让我们更正您的初始查询,使其实际产生预期结果
#standardSQL
SELECT id,
CASE
WHEN one_x1 = two_x1 AND one_x1 = three_x1 THEN CONCAT( object1_name, ", ", object2_name, ", ", object3_name )
WHEN one_x1 = two_x1 THEN CONCAT( object1_name, ", ", object2_name )
WHEN one_x1 = three_x1 THEN CONCAT( object1_name, ", ", object3_name )
WHEN two_x1 = three_x1 THEN CONCAT( object2_name, ", ", object3_name )
ELSE NULL
END AS field1,
CASE
WHEN one_x1 = two_x1 AND one_x1 = three_x1 THEN [score_one, score_two, score_three]
WHEN one_x1 = two_x1 THEN [score_one, score_two]
WHEN one_x1 = three_x1 THEN [score_one, score_three]
WHEN two_x1 = three_x1 THEN [score_two, score_three]
ELSE NULL
END AS field2
FROM `project.dataset.table`
如果应用于您问题中的示例数据 - 结果是
Row id field1 field2
1 1 Dog, Animal 0.82
0.72
2 2 Horse, Animal, Bird 0.76
0.73
0.9
3 3 Dog, Animal, Chicken 0.67
0.75
0.65
4 4 Bird, Chicken 0.87
0.86
接下来,据我了解,您希望避免在您的 CASE 中一次又一次地重复相同的条件 - 为此 - 您可以使用以下技巧
#standardSQL
SELECT id, fields.* FROM (
SELECT id,
CASE
WHEN one_x1 = two_x1 AND one_x1 = three_x1 THEN
STRUCT(CONCAT( object1_name, ", ", object2_name, ", ", object3_name) AS field1, [score_one, score_two, score_three] AS field2)
WHEN one_x1 = two_x1 THEN
STRUCT(CONCAT( object1_name, ", ", object2_name ) AS field1, [score_one, score_two] AS field2)
WHEN one_x1 = three_x1 THEN
STRUCT(CONCAT( object1_name, ", ", object3_name ) AS field1, [score_one, score_three] AS field2)
WHEN two_x1 = three_x1 THEN
STRUCT(CONCAT( object2_name, ", ", object3_name ) AS field1, [score_two, score_three] AS field2)
ELSE NULL
END AS fields
FROM `project.dataset.table`
)
显然具有相同的输出...
最后,作为您的另一种选择 - 您可以使用以下方法消除所有这些 case/when/then
#standardSQL
SELECT id,
(SELECT STRING_AGG(object) FROM UNNEST(objects) object WITH OFFSET
JOIN UNNEST(pos) OFFSET USING(OFFSET)
) field1,
(SELECT ARRAY_AGG(score) FROM UNNEST(scores) score WITH OFFSET
JOIN UNNEST(pos) OFFSET USING(OFFSET)
) field2
FROM (
SELECT id,
[object1_name, object2_name, object3_name] objects,
[score_one, score_two, score_three] scores,
(SELECT ARRAY_AGG(OFFSET)
FROM UNNEST([one_x1, two_x1, three_x1]) x WITH OFFSET
GROUP BY x HAVING COUNT(1) > 1
) pos
FROM `project.dataset.table`
)
再次使用相同的输出
我在 BigQuery 上使用标准 SQL 根据现有 table 中的某些条件创建新的 table。我有多个 WHEN 子句来支持这一点(因为我正在检查几个不同的条件)。我现在想要做的是在这些 WHEN 语句中有多个 THEN 子句,因为我打算添加多个列。
具体来说,我想将两个现有文本字段的串联添加为一个字段,然后将三个现有字段的聚合数组添加为一个字段:
CASE WHEN
# all three match
one_x1 = two_x1 = three_x1 THEN CONCAT( object1_name, ", ", object2_name, ", ", object3_name ) AND ARRAY_AGG(STRUCT(score_one, score_two, score_three))
# one and two match
WHEN one_x1 = two_x1 THEN CONCAT( object1_name, ", ", object2_name ) AND ARRAY_AGG(STRUCT(score_one, score_two))
# one and three match
WHEN one_x1 = three_x1 THEN CONCAT( object1_name, ", ", object3_name ) AND ARRAY_AGG(STRUCT(score_one, score_three))
# two and three match
WHEN two_x1 = three_x1 THEN CONCAT( object2_name, ", ", object3_name ) AND ARRAY_AGG(STRUCT(score_two, score_three))
ELSE
NULL
END
是'AND ARRAY_AGG(STRUCT(xxxxx))'部分不行,我也试过用逗号分隔THEN子句。
重复相同的 case 语句来单独分隔 THEN 子句是唯一的选择吗?
示例数据: sample_data 示例数据第 1 行的预期结果: here
如果您希望在结果集中生成两个单独的列,则需要创建两个 CASE
语句,例如:
CASE
WHEN one_x1 = two_x1 = three_x1 THEN CONCAT( object1_name, ", ", object2_name, ", ", object3_name )
WHEN one_x1 = two_x1 THEN CONCAT( object1_name, ", ", object2_name )
WHEN one_x1 = three_x1 THEN CONCAT( object1_name, ", ", object3_name )
WHEN two_x1 = three_x1 THEN CONCAT( object2_name, ", ", object3_name )
ELSE NULL
END AS field1,
CASE
WHEN one_x1 = two_x1 = three_x1 THEN ARRAY_AGG(STRUCT(score_one, score_two, score_three))
WHEN one_x1 = two_x1 THEN ARRAY_AGG(STRUCT(score_one, score_two))
WHEN one_x1 = three_x1 THEN ARRAY_AGG(STRUCT(score_one, score_three))
WHEN two_x1 = three_x1 THEN ARRAY_AGG(STRUCT(score_two, score_three))
ELSE NULL
END AS field2
以下适用于 BigQuery 标准 SQL
首先,让我们更正您的初始查询,使其实际产生预期结果
#standardSQL
SELECT id,
CASE
WHEN one_x1 = two_x1 AND one_x1 = three_x1 THEN CONCAT( object1_name, ", ", object2_name, ", ", object3_name )
WHEN one_x1 = two_x1 THEN CONCAT( object1_name, ", ", object2_name )
WHEN one_x1 = three_x1 THEN CONCAT( object1_name, ", ", object3_name )
WHEN two_x1 = three_x1 THEN CONCAT( object2_name, ", ", object3_name )
ELSE NULL
END AS field1,
CASE
WHEN one_x1 = two_x1 AND one_x1 = three_x1 THEN [score_one, score_two, score_three]
WHEN one_x1 = two_x1 THEN [score_one, score_two]
WHEN one_x1 = three_x1 THEN [score_one, score_three]
WHEN two_x1 = three_x1 THEN [score_two, score_three]
ELSE NULL
END AS field2
FROM `project.dataset.table`
如果应用于您问题中的示例数据 - 结果是
Row id field1 field2
1 1 Dog, Animal 0.82
0.72
2 2 Horse, Animal, Bird 0.76
0.73
0.9
3 3 Dog, Animal, Chicken 0.67
0.75
0.65
4 4 Bird, Chicken 0.87
0.86
接下来,据我了解,您希望避免在您的 CASE 中一次又一次地重复相同的条件 - 为此 - 您可以使用以下技巧
#standardSQL
SELECT id, fields.* FROM (
SELECT id,
CASE
WHEN one_x1 = two_x1 AND one_x1 = three_x1 THEN
STRUCT(CONCAT( object1_name, ", ", object2_name, ", ", object3_name) AS field1, [score_one, score_two, score_three] AS field2)
WHEN one_x1 = two_x1 THEN
STRUCT(CONCAT( object1_name, ", ", object2_name ) AS field1, [score_one, score_two] AS field2)
WHEN one_x1 = three_x1 THEN
STRUCT(CONCAT( object1_name, ", ", object3_name ) AS field1, [score_one, score_three] AS field2)
WHEN two_x1 = three_x1 THEN
STRUCT(CONCAT( object2_name, ", ", object3_name ) AS field1, [score_two, score_three] AS field2)
ELSE NULL
END AS fields
FROM `project.dataset.table`
)
显然具有相同的输出...
最后,作为您的另一种选择 - 您可以使用以下方法消除所有这些 case/when/then
#standardSQL
SELECT id,
(SELECT STRING_AGG(object) FROM UNNEST(objects) object WITH OFFSET
JOIN UNNEST(pos) OFFSET USING(OFFSET)
) field1,
(SELECT ARRAY_AGG(score) FROM UNNEST(scores) score WITH OFFSET
JOIN UNNEST(pos) OFFSET USING(OFFSET)
) field2
FROM (
SELECT id,
[object1_name, object2_name, object3_name] objects,
[score_one, score_two, score_three] scores,
(SELECT ARRAY_AGG(OFFSET)
FROM UNNEST([one_x1, two_x1, three_x1]) x WITH OFFSET
GROUP BY x HAVING COUNT(1) > 1
) pos
FROM `project.dataset.table`
)
再次使用相同的输出