Google BigQuery:Table 使用 REPEATED RECORD 值加入
Google BigQuery: Table join with REPEATED RECORD values
我有一个 table 这样的:
Row field1 field2 field3.a field3.b
1 value1 1 id1 key5
id2 key6
2 value2 2 id3 key7
id4 key8
前两列是标准字段,第三列是重复记录字段。
我还有 2 个 tables
Row id value
1 id1 my-valueA
2 id2 my-valueB
3 id3 my-valueC
4 id4 my-valueD
和
Row id value
1 key5 my-valueE
2 key6 my-valueF
3 key7 my-valueG
4 key8 my-valueH
从第一个 table 映射 id/key (我使用术语 key
和 id
以避免混淆,但最后的概念是相同)与给定值
这是重现结构的完整语句
#standardSQL
WITH my_table AS (
SELECT "value1" as field1, 1 as field2, [STRUCT("id1" as a, "key5" as b),STRUCT("id2" as a, "key6" as b)] as field3
UNION ALL
SELECT "value2" as field2, 2 as field2, [STRUCT("id3" as a, "key7" as b),STRUCT("id4" as a, "key8" as b)] as field3
),
ids_table AS (
SELECT "id1" as id, "my-valueA" as value
UNION ALL
SELECT "id2" as id, "my-valueB" as value
UNION ALL
SELECT "id3" as id, "my-valueC" as value
UNION ALL
SELECT "id4" as id, "my-valueD" as value
),
keys_table AS (
SELECT "key5" as id, "my-valueE" as value
UNION ALL
SELECT "key6" as id, "my-valueF" as value
UNION ALL
SELECT "key7" as id, "my-valueG" as value
UNION ALL
SELECT "key8" as id, "my-valueH" as value
)
-- SELECT * FROM my_table
-- SELECT * FROM ids_table
-- SELECT * FROM keys_table
我的目标是将第一个 table 中的 key/id 值替换为其他 2 个 table 给出的值,就像对 id 的经典连接。
这是预期的输出
Row field1 field2 t2_value t3_value
1 value1 1 my-valueA my-valueE
my-valueB my-valueF
2 value2 2 my-valueC my-valueG
my-valueD my-valueH
起初我考虑使用 UNNEST
运算符来获取扁平线,因此可以进行简单的 JOIN 来解析值,然后用替换值重新加入数组。
SELECT my_table.* EXCEPT(field3),
t2.value as t2_value,
t3.value as t3_value
FROM my_table CROSS JOIN UNNEST(my_table.field3) AS t1
LEFT JOIN ids_table AS t2 ON t1.a = t2.id
LEFT JOIN keys_table as t3 ON t1.b = t3.id
使用此语句,值已从 id 正确替换为值,但现在我无法重现之前的 RECORD REPEATED 结构
以下适用于 BigQuery 标准 SQL
#standardSQL
WITH my_table AS (
SELECT "value1" AS field1, 1 AS field2, [STRUCT("id1" AS a, "key5" AS b),STRUCT("id2" AS a, "key6" AS b)] AS field3 UNION ALL
SELECT "value2" AS field2, 2 AS field2, [STRUCT("id3" AS a, "key7" AS b),STRUCT("id4" AS a, "key8" AS b)] AS field3
), ids_table AS (
SELECT "id1" AS id, "my-valueA" AS value UNION ALL
SELECT "id2" AS id, "my-valueB" AS value UNION ALL
SELECT "id3" AS id, "my-valueC" AS value UNION ALL
SELECT "id4" AS id, "my-valueD" AS value
),keys_table AS (
SELECT "key5" AS id, "my-valueE" AS value UNION ALL
SELECT "key6" AS id, "my-valueF" AS value UNION ALL
SELECT "key7" AS id, "my-valueG" AS value UNION ALL
SELECT "key8" AS id, "my-valueH" AS value
)
SELECT
field1, field2,
(
SELECT ARRAY_AGG(STRUCT<a_value STRING, b_value STRING>(t2.value, t3.value))
FROM UNNEST(field3) t1
LEFT JOIN ids_table AS t2 ON t1.a = t2.id
LEFT JOIN keys_table AS t3 ON t1.b = t3.id
) AS field3
FROM my_table
输出如下
field1 field2 field3.a_value field3.b_value
value1 1 my-valueA my-valueE
my-valueB my-valueF
value2 2 my-valueC my-valueG
my-valueD my-valueH
我有一个 table 这样的:
Row field1 field2 field3.a field3.b
1 value1 1 id1 key5
id2 key6
2 value2 2 id3 key7
id4 key8
前两列是标准字段,第三列是重复记录字段。
我还有 2 个 tables
Row id value
1 id1 my-valueA
2 id2 my-valueB
3 id3 my-valueC
4 id4 my-valueD
和
Row id value
1 key5 my-valueE
2 key6 my-valueF
3 key7 my-valueG
4 key8 my-valueH
从第一个 table 映射 id/key (我使用术语 key
和 id
以避免混淆,但最后的概念是相同)与给定值
这是重现结构的完整语句
#standardSQL
WITH my_table AS (
SELECT "value1" as field1, 1 as field2, [STRUCT("id1" as a, "key5" as b),STRUCT("id2" as a, "key6" as b)] as field3
UNION ALL
SELECT "value2" as field2, 2 as field2, [STRUCT("id3" as a, "key7" as b),STRUCT("id4" as a, "key8" as b)] as field3
),
ids_table AS (
SELECT "id1" as id, "my-valueA" as value
UNION ALL
SELECT "id2" as id, "my-valueB" as value
UNION ALL
SELECT "id3" as id, "my-valueC" as value
UNION ALL
SELECT "id4" as id, "my-valueD" as value
),
keys_table AS (
SELECT "key5" as id, "my-valueE" as value
UNION ALL
SELECT "key6" as id, "my-valueF" as value
UNION ALL
SELECT "key7" as id, "my-valueG" as value
UNION ALL
SELECT "key8" as id, "my-valueH" as value
)
-- SELECT * FROM my_table
-- SELECT * FROM ids_table
-- SELECT * FROM keys_table
我的目标是将第一个 table 中的 key/id 值替换为其他 2 个 table 给出的值,就像对 id 的经典连接。
这是预期的输出
Row field1 field2 t2_value t3_value
1 value1 1 my-valueA my-valueE
my-valueB my-valueF
2 value2 2 my-valueC my-valueG
my-valueD my-valueH
起初我考虑使用 UNNEST
运算符来获取扁平线,因此可以进行简单的 JOIN 来解析值,然后用替换值重新加入数组。
SELECT my_table.* EXCEPT(field3),
t2.value as t2_value,
t3.value as t3_value
FROM my_table CROSS JOIN UNNEST(my_table.field3) AS t1
LEFT JOIN ids_table AS t2 ON t1.a = t2.id
LEFT JOIN keys_table as t3 ON t1.b = t3.id
使用此语句,值已从 id 正确替换为值,但现在我无法重现之前的 RECORD REPEATED 结构
以下适用于 BigQuery 标准 SQL
#standardSQL
WITH my_table AS (
SELECT "value1" AS field1, 1 AS field2, [STRUCT("id1" AS a, "key5" AS b),STRUCT("id2" AS a, "key6" AS b)] AS field3 UNION ALL
SELECT "value2" AS field2, 2 AS field2, [STRUCT("id3" AS a, "key7" AS b),STRUCT("id4" AS a, "key8" AS b)] AS field3
), ids_table AS (
SELECT "id1" AS id, "my-valueA" AS value UNION ALL
SELECT "id2" AS id, "my-valueB" AS value UNION ALL
SELECT "id3" AS id, "my-valueC" AS value UNION ALL
SELECT "id4" AS id, "my-valueD" AS value
),keys_table AS (
SELECT "key5" AS id, "my-valueE" AS value UNION ALL
SELECT "key6" AS id, "my-valueF" AS value UNION ALL
SELECT "key7" AS id, "my-valueG" AS value UNION ALL
SELECT "key8" AS id, "my-valueH" AS value
)
SELECT
field1, field2,
(
SELECT ARRAY_AGG(STRUCT<a_value STRING, b_value STRING>(t2.value, t3.value))
FROM UNNEST(field3) t1
LEFT JOIN ids_table AS t2 ON t1.a = t2.id
LEFT JOIN keys_table AS t3 ON t1.b = t3.id
) AS field3
FROM my_table
输出如下
field1 field2 field3.a_value field3.b_value
value1 1 my-valueA my-valueE
my-valueB my-valueF
value2 2 my-valueC my-valueG
my-valueD my-valueH