BigQuery SQL JSON 当前行包含多个值时返回其他行
BigQuery SQL JSON Returning additional rows when current row contains multiple values
我有一个 table 看起来像这样
keyA | data:{"value":false}}
keyB | data:{"value":3}}
keyC | data:{"value":{"paid":10,"unpaid":20}}}
对于 keyA
,keyB
我可以很容易地用 JSON_EXTRACT_SCALAR
提取单个值,但是对于 keyC
我想 return 多个值和更改键名,因此最终输出如下所示:
keyA | false
keyB | 3
keyC-paid | 10
keyD-unpaid | 20
我知道我可以使用 UNNEST 和 JSON_EXTRACT 多个值并创建额外的但不确定如何组合它们来调整键列名称?
试试这个:
WITH sample AS (
SELECT 'keyA' AS col, '{"value":false}' AS data
UNION ALL
SELECT 'keyB' AS col, '{"value":3}' AS data
UNION ALL
SELECT 'keyC' AS col, '{"value":{"paid":10,"unpaid":20}}' AS data
)
SELECT col || IFNULL('-' || k, '') AS col,
IFNULL(v, JSON_VALUE(data, '$.value')) AS data
FROM (
SELECT col, data,
`bqutil.fn.json_extract_keys`(JSON_QUERY(data, '$.value')) AS keys,
`bqutil.fn.json_extract_values`(JSON_QUERY(data, '$.value')) AS vals
FROM sample
) LEFT JOIN UNNEST(keys) k WITH OFFSET ki
LEFT JOIN UNNEST(vals) v WITH OFFSET vi ON ki = vi;
更通用的方法
create temp function extract_keys(input string) returns array<string> language js as """
return Object.keys(JSON.parse(input));
""";
create temp function extract_values(input string) returns array<string> language js as """
return Object.values(JSON.parse(input));
""";
create temp function extract_all_leaves(input string) returns string language js as '''
function flattenObj(obj, parent = '', res = {}){
for(let key in obj){
let propName = parent ? parent + '.' + key : key;
if(typeof obj[key] == 'object'){
flattenObj(obj[key], propName, res);
} else {
res[propName] = obj[key];
}
}
return JSON.stringify(res);
}
return flattenObj(JSON.parse(input));
''';
select col || replace(replace(key, 'value', ''), '.', '-') as col, value,
from your_table,
unnest([struct(extract_all_leaves(data) as json)]),
unnest(extract_keys(json)) key with offset
join unnest(extract_values(json)) value with offset
using(offset)
如果应用于您问题中的示例数据 - 输出为
这种方法的好处是它非常通用,因此可以处理 json
中的任何级别的嵌套
下面的例子data/table
输出是
我有一个 table 看起来像这样
keyA | data:{"value":false}}
keyB | data:{"value":3}}
keyC | data:{"value":{"paid":10,"unpaid":20}}}
对于 keyA
,keyB
我可以很容易地用 JSON_EXTRACT_SCALAR
提取单个值,但是对于 keyC
我想 return 多个值和更改键名,因此最终输出如下所示:
keyA | false
keyB | 3
keyC-paid | 10
keyD-unpaid | 20
我知道我可以使用 UNNEST 和 JSON_EXTRACT 多个值并创建额外的但不确定如何组合它们来调整键列名称?
试试这个:
WITH sample AS (
SELECT 'keyA' AS col, '{"value":false}' AS data
UNION ALL
SELECT 'keyB' AS col, '{"value":3}' AS data
UNION ALL
SELECT 'keyC' AS col, '{"value":{"paid":10,"unpaid":20}}' AS data
)
SELECT col || IFNULL('-' || k, '') AS col,
IFNULL(v, JSON_VALUE(data, '$.value')) AS data
FROM (
SELECT col, data,
`bqutil.fn.json_extract_keys`(JSON_QUERY(data, '$.value')) AS keys,
`bqutil.fn.json_extract_values`(JSON_QUERY(data, '$.value')) AS vals
FROM sample
) LEFT JOIN UNNEST(keys) k WITH OFFSET ki
LEFT JOIN UNNEST(vals) v WITH OFFSET vi ON ki = vi;
更通用的方法
create temp function extract_keys(input string) returns array<string> language js as """
return Object.keys(JSON.parse(input));
""";
create temp function extract_values(input string) returns array<string> language js as """
return Object.values(JSON.parse(input));
""";
create temp function extract_all_leaves(input string) returns string language js as '''
function flattenObj(obj, parent = '', res = {}){
for(let key in obj){
let propName = parent ? parent + '.' + key : key;
if(typeof obj[key] == 'object'){
flattenObj(obj[key], propName, res);
} else {
res[propName] = obj[key];
}
}
return JSON.stringify(res);
}
return flattenObj(JSON.parse(input));
''';
select col || replace(replace(key, 'value', ''), '.', '-') as col, value,
from your_table,
unnest([struct(extract_all_leaves(data) as json)]),
unnest(extract_keys(json)) key with offset
join unnest(extract_values(json)) value with offset
using(offset)
如果应用于您问题中的示例数据 - 输出为
这种方法的好处是它非常通用,因此可以处理 json
中的任何级别的嵌套下面的例子data/table
输出是