BigQuery 如何获取 JSON 结构中值的总和?
BigQuery How to get the sum of values in a JSON struct?
我有以下查询
SELECT
JSON_EXTRACT(json, '$.Weights') as weight
from
(select '{"Weights":{"blue":1.0,"purple":0.0,"yellow":1.0,"green":1.0}}' as json)
其中 returns :
{"blue":1.0,"purple":0.0,"yellow":1.0,"green":1.0}
我想看看有没有办法求和所有颜色的值。意思是 return:
3.0
我一直在尝试使用 split 和 unnest 函数但没有成功,有什么建议吗?谢谢
这里是一个使用 REGEXP_EXTRACT_ALL
的例子:
WITH T AS (
SELECT '{"Weights":{"blue":1.0,"purple":0.0,"yellow":1.0,"green":1.0}}' AS json
)
SELECT
(
SELECT SUM(CAST(val AS FLOAT64))
FROM UNNEST(
REGEXP_EXTRACT_ALL(
JSON_EXTRACT(json, '$.Weights'),
r':([^,}]+)')
) AS val
)
FROM T;
为了探索其他选择 -
以下适用于 BigQuery 标准 SQL
第一个示例是为每一行提取 key:value 对
#standardSQL
WITH `project.dataset.yourTbale` AS (
SELECT 1 AS id, '{"Weights":{"blue":1.0,"purple":0.0,"yellow":1.0,"green":1.0}}' AS json
UNION ALL SELECT 2, '{"Weights":{"blue":1.0,"red":2.0,"yellow":1.0,"orange":3.0}}'
)
SELECT id,
REPLACE(SPLIT(pair, ':')[OFFSET (0)], '"', '') color,
SAFE_CAST(SPLIT(pair, ':')[OFFSET (1)] AS FLOAT64) value
FROM `project.dataset.yourTbale`,
UNNEST(SPLIT(REGEXP_REPLACE(JSON_EXTRACT(json, '$.Weights'), r'{|}', ''))) pair
这给你下面的结果
id color value
1 blue 1.0
1 purple 0.0
1 yellow 1.0
1 green 1.0
2 blue 1.0
2 red 2.0
2 yellow 1.0
2 orange 3.0
所以现在很容易将上面的内容扩展到 if there is a way to sum up all the values of the colors
的原始问题,甚至可以通过对特定颜色进行潜在过滤来进一步扩展它 - 请参见下面的示例
#standardSQL
WITH `project.dataset.yourTbale` AS (
SELECT 1 AS id, '{"Weights":{"blue":1.0,"purple":0.0,"yellow":1.0,"green":1.0}}' AS json
UNION ALL SELECT 2, '{"Weights":{"blue":1.0,"red":2.0,"yellow":1.0,"orange":3.0}}'
)
SELECT id,
SUM(SAFE_CAST(SPLIT(pair, ':')[OFFSET (1)] AS FLOAT64)) AS total
FROM `project.dataset.yourTbale`,
UNNEST(SPLIT(REGEXP_REPLACE(JSON_EXTRACT(json, '$.Weights'), r'{|}', ''))) pair
WHERE REPLACE(SPLIT(pair, ':')[OFFSET (0)], '"', '') != 'blue'
GROUP BY id
结果如下(从计算中排除了 color=blue)
id total
1 2.0
2 6.0
我有以下查询
SELECT
JSON_EXTRACT(json, '$.Weights') as weight
from
(select '{"Weights":{"blue":1.0,"purple":0.0,"yellow":1.0,"green":1.0}}' as json)
其中 returns :
{"blue":1.0,"purple":0.0,"yellow":1.0,"green":1.0}
我想看看有没有办法求和所有颜色的值。意思是 return:
3.0
我一直在尝试使用 split 和 unnest 函数但没有成功,有什么建议吗?谢谢
这里是一个使用 REGEXP_EXTRACT_ALL
的例子:
WITH T AS (
SELECT '{"Weights":{"blue":1.0,"purple":0.0,"yellow":1.0,"green":1.0}}' AS json
)
SELECT
(
SELECT SUM(CAST(val AS FLOAT64))
FROM UNNEST(
REGEXP_EXTRACT_ALL(
JSON_EXTRACT(json, '$.Weights'),
r':([^,}]+)')
) AS val
)
FROM T;
为了探索其他选择 -
以下适用于 BigQuery 标准 SQL
第一个示例是为每一行提取 key:value 对
#standardSQL
WITH `project.dataset.yourTbale` AS (
SELECT 1 AS id, '{"Weights":{"blue":1.0,"purple":0.0,"yellow":1.0,"green":1.0}}' AS json
UNION ALL SELECT 2, '{"Weights":{"blue":1.0,"red":2.0,"yellow":1.0,"orange":3.0}}'
)
SELECT id,
REPLACE(SPLIT(pair, ':')[OFFSET (0)], '"', '') color,
SAFE_CAST(SPLIT(pair, ':')[OFFSET (1)] AS FLOAT64) value
FROM `project.dataset.yourTbale`,
UNNEST(SPLIT(REGEXP_REPLACE(JSON_EXTRACT(json, '$.Weights'), r'{|}', ''))) pair
这给你下面的结果
id color value
1 blue 1.0
1 purple 0.0
1 yellow 1.0
1 green 1.0
2 blue 1.0
2 red 2.0
2 yellow 1.0
2 orange 3.0
所以现在很容易将上面的内容扩展到 if there is a way to sum up all the values of the colors
的原始问题,甚至可以通过对特定颜色进行潜在过滤来进一步扩展它 - 请参见下面的示例
#standardSQL
WITH `project.dataset.yourTbale` AS (
SELECT 1 AS id, '{"Weights":{"blue":1.0,"purple":0.0,"yellow":1.0,"green":1.0}}' AS json
UNION ALL SELECT 2, '{"Weights":{"blue":1.0,"red":2.0,"yellow":1.0,"orange":3.0}}'
)
SELECT id,
SUM(SAFE_CAST(SPLIT(pair, ':')[OFFSET (1)] AS FLOAT64)) AS total
FROM `project.dataset.yourTbale`,
UNNEST(SPLIT(REGEXP_REPLACE(JSON_EXTRACT(json, '$.Weights'), r'{|}', ''))) pair
WHERE REPLACE(SPLIT(pair, ':')[OFFSET (0)], '"', '') != 'blue'
GROUP BY id
结果如下(从计算中排除了 color=blue)
id total
1 2.0
2 6.0