将数组转换为 BigQuery 中的列
Transform Array into columns in BigQuery
我有一个 json 字符串存储在 BigQuery 的字符串列中。里面有一个数组。我想从数组中选择一些字段并将其值写入 BQ 列。
例如 - 考虑以下 json 存储在 BQ
{
"pool": "mypool",
"statusCode": "0",
"payloads": [
{
"name": "request",
"fullpath": "com.gcp.commontools.edlpayload.EDLPayloadManagerTest$Request",
"jsonPayload": {
"body": "{\"data\":\"foo\"}"
},
"orientation": "REQUEST",
"httpTransport": {
"httpMethod": "POST",
"headers": {
"headers": {
"a": "1"
}
},
"sourceEndpoint": "/v1/foobar"
}
},
{
"name": "response",
"fullpath": "com.gcp.commontools.edlpayload.EDLPayloadManagerTest$Response",
"jsonPayload": {
"body": "{\"data\":\"bar\"}"
},
"orientation": "RESPONSE",
"httpTransport": {
"headers": {
"headers": {
"b": "2"
}
},
"httpResponseCode": 200
}
},
{
"name": "attributes",
"fullpath": "java.util.HashMap",
"nameValuePairs": {
"data": {
"one": "1"
}
},
"orientation": "TRANSITORY"
}
],
"uuid": "11EC-C714-8ADE2390-9619-1B80E63968CC",
"payloadName": "my-overall-name"
}
考虑目标 BQ table 架构是
池、requestFullPath、requestPayload、responseFullPath、responsePayload
从上面的 json 中,我想挑选几个 json 元素并将其值映射到 BQ 中的列。请注意,有效负载数组本质上是动态的。有效载荷数组中只能有 1 个有效载荷,也可以有多个。而且它们的顺序不是固定的。例如,请求负载可以出现在第 [0] 个位置、第 1 个位置等
考虑以下
select * from (
select
json_value(json_col, '$.pool') as pool,
json_value(payload, '$.name') as name,
json_value(payload, '$.fullpath') as FullPath,
json_value(payload, '$.jsonPayload.body') as Payload,
from your_table t
, unnest(json_extract_array(json_col, '$.payloads')) payload
)
pivot (any_value(FullPath) as FullPath, any_value(Payload) as Payload for name in ('request', 'response') )
如果应用于您问题中的示例数据 - 输出为
我有一个 json 字符串存储在 BigQuery 的字符串列中。里面有一个数组。我想从数组中选择一些字段并将其值写入 BQ 列。
例如 - 考虑以下 json 存储在 BQ
{
"pool": "mypool",
"statusCode": "0",
"payloads": [
{
"name": "request",
"fullpath": "com.gcp.commontools.edlpayload.EDLPayloadManagerTest$Request",
"jsonPayload": {
"body": "{\"data\":\"foo\"}"
},
"orientation": "REQUEST",
"httpTransport": {
"httpMethod": "POST",
"headers": {
"headers": {
"a": "1"
}
},
"sourceEndpoint": "/v1/foobar"
}
},
{
"name": "response",
"fullpath": "com.gcp.commontools.edlpayload.EDLPayloadManagerTest$Response",
"jsonPayload": {
"body": "{\"data\":\"bar\"}"
},
"orientation": "RESPONSE",
"httpTransport": {
"headers": {
"headers": {
"b": "2"
}
},
"httpResponseCode": 200
}
},
{
"name": "attributes",
"fullpath": "java.util.HashMap",
"nameValuePairs": {
"data": {
"one": "1"
}
},
"orientation": "TRANSITORY"
}
],
"uuid": "11EC-C714-8ADE2390-9619-1B80E63968CC",
"payloadName": "my-overall-name"
}
考虑目标 BQ table 架构是
池、requestFullPath、requestPayload、responseFullPath、responsePayload
从上面的 json 中,我想挑选几个 json 元素并将其值映射到 BQ 中的列。请注意,有效负载数组本质上是动态的。有效载荷数组中只能有 1 个有效载荷,也可以有多个。而且它们的顺序不是固定的。例如,请求负载可以出现在第 [0] 个位置、第 1 个位置等
考虑以下
select * from (
select
json_value(json_col, '$.pool') as pool,
json_value(payload, '$.name') as name,
json_value(payload, '$.fullpath') as FullPath,
json_value(payload, '$.jsonPayload.body') as Payload,
from your_table t
, unnest(json_extract_array(json_col, '$.payloads')) payload
)
pivot (any_value(FullPath) as FullPath, any_value(Payload) as Payload for name in ('request', 'response') )
如果应用于您问题中的示例数据 - 输出为