取消嵌套 JSON 存储在列 [BigQuery] 中的字符串
Unnesting JSON String stored in a column [BigQuery]
我有一个 table,其中一列包含原始 JSON 字符串,如下所示:
示例 JSON 存储在 order_lines:
{
"STR_BLK_002":{
"amount":167,
"type":"part spare",
"total_discount":0,
"color":"Black",
"is_out_of_stock":false,
"variable_fields":{
"Size":"XL",
"trueColor":"Black"
},
"category_id":"44356721",
"status_list":[
{
"id":1,
"time":"2021-04-01T15:01:54.746Z",
"status":"ORDER PLACED"
},
{
"id":2,
"time":"2021-04-02T10:31:00.397Z",
"status":"PACKED"
},
{
"id":3,
"time":"2021-04-04T10:31:01.719Z",
"status":"SHIPPED"
},
{
"id":3,
"time":"2021-04-04T18:12:06.896Z",
"status":"SHIPPED"
}
],
"product_id":270,
"price_per_quantity":167,
"quantity":1,
"maximum_quantity":10,
"variant_name":"Helmet strap",
"current_status":30,
"estimated_delivery":"09 Apr 2021",
"total_before_discount":167,
"delivery_statuses":[
{
"time":"2021-04-01T15:10:13.594Z",
"status":"FULFILLABLE"
},
{
"time":"2021-04-02T10:31:00.397Z",
"status":"PACKED"
},
{
"time":"2021-04-03T10:31:01.197Z",
"status":"READY_TO_SHIP"
},
{
"time":"2021-04-04T10:31:01.719Z",
"status":"DISPATCHED"
},
{
"time":"2021-04-04T18:12:06.896Z",
"status":"SHIPPED"
}
],
"sku_code":"STR_BLK_002"
}
}
我想取消嵌套这个字符串,以便可以单独访问键值对。此外,sku_code,(上面共享示例中的 'STR_BLK_002')在任何其他列中都不可用,并且该字符串可以包含更多单个 sku,因此如果有 2 个 sku(s)对应于一个订单那么 JSON 字符串将是:
{
"STR_BLK_002":{
"amount":167,
"type":"part spare",
"total_discount":0,
"color":"Black",
"is_out_of_stock":false,
"variable_fields":{
"Size":"XL",
"trueColor":"Black"
},
"category_id":"44356721",
"status_list":[
{
"id":1,
"time":"2021-04-01T15:01:54.746Z",
"status":"ORDER PLACED"
},
{
"id":2,
"time":"2021-04-02T10:31:00.397Z",
"status":"PACKED"
},
{
"id":3,
"time":"2021-04-04T10:31:01.719Z",
"status":"SHIPPED"
},
{
"id":3,
"time":"2021-04-04T18:12:06.896Z",
"status":"SHIPPED"
}
],
"product_id":270,
"price_per_quantity":167,
"quantity":1,
"maximum_quantity":10,
"variant_name":"Helmet strap",
"current_status":3,
"estimated_delivery":"09 Apr 2021",
"total_before_discount":167,
"delivery_statuses":[
{
"time":"2021-04-01T15:10:13.594Z",
"status":"FULFILLABLE"
},
{
"time":"2021-04-02T10:31:00.397Z",
"status":"PACKED"
},
{
"time":"2021-04-03T10:31:01.197Z",
"status":"READY_TO_SHIP"
},
{
"time":"2021-04-04T10:31:01.719Z",
"status":"DISPATCHED"
},
{
"time":"2021-04-04T18:12:06.896Z",
"status":"SHIPPED"
}
],
"sku_code":"STR_BLK_002"
},
"STR_BLK_008":{
"amount":590,
"type":"accessory",
"total_discount":0,
"color":"blue",
"is_out_of_stock":false,
"variable_fields":{
"Size":"XL",
"trueColor":"prussian blue"
},
"category_id":"65577970",
"status_list":[
{
"id":1,
"time":"2021-04-06T15:01:54.746Z",
"status":"ORDER PLACED"
},
{
"id":2,
"time":"2021-04-07T10:31:00.397Z",
"status":"PACKED"
},
{
"id":3,
"time":"2021-04-07T10:31:01.719Z",
"status":"SHIPPED"
},
{
"id":3,
"time":"2021-04-08T18:12:06.896Z",
"status":"SHIPPED"
}
],
"product_id":276,
"price_per_quantity":590,
"quantity":1,
"maximum_quantity":5,
"variant_name":"Car Perfume",
"current_status":3,
"estimated_delivery":"09 Apr 2021",
"total_before_discount":590,
"delivery_statuses":[
{
"time":"2021-04-06T15:10:13.594Z",
"status":"FULFILLABLE"
},
{
"time":"2021-04-07T10:31:00.397Z",
"status":"PACKED"
},
{
"time":"2021-04-07T10:31:01.197Z",
"status":"READY_TO_SHIP"
},
{
"time":"2021-04-08T10:31:01.719Z",
"status":"DISPATCHED"
},
{
"time":"2021-04-10T18:12:06.896Z",
"status":"SHIPPED"
}
],
"sku_code":"STR_BLK_008"
}
}
我想将此信息分成单独的列,以便我可以为每个 SKU 获取相应的值。
下面应该给你一个好的开始
select
json_extract_scalar(line, '$.sku_code') as sku_code,
json_extract_scalar(line, '$.amount') as amount,
json_extract_scalar(line, '$.type') as type,
json_extract_scalar(line, '$.total_discount') as total_discount,
json_extract_scalar(line, '$.color') as color,
json_extract_scalar(line, '$.variable_fields.Size') as Size,
json_extract_scalar(line, '$.variable_fields.trueColor') as trueColor,
from `project.dataset.table`,
unnest(split(regexp_replace(regexp_replace(order_lines, r'\s', ''), r'"STR_BLK_\d+":{', '"STR_BLK":{'),'"STR_BLK":')) order_line with offset,
unnest([struct('{' || trim(order_line, ',{}}') || '}' as line)])
where offset > 0
如果应用于您问题中的第一个示例 - 输出为
如果应用于您问题中的第二个示例 - 输出为
希望您可以将此示例扩展到您心中的任何最终目标
所以基本上我认为你想要做的是首先将你的列转换为一个结构数组,而不是这样:
{
"STR_BLK_002": {...},
"STR_BLK_003": {...}
}
你有这样的东西:
[
{
"amount":167,
"type":"part spare",
"total_discount":0,
...
},
{
"amount":590,
"type":"accessory",
"total_discount":0,
...
}
]
使用该格式的数据,您可以利用 UNNEST to make each entry into its own row, and then use JSON functions to pull out fields into their own columns, for example JSON_EXTRACT_SCALAR
为了做到这一点,我构建了一个 Javascript UDF 来查找对象中的键,然后遍历每个键以创建一个结构数组。
CREATE TEMP FUNCTION format_json(str STRING)
RETURNS ARRAY<STRING>
LANGUAGE js AS r"""
var obj = JSON.parse(str);
var keys = Object.keys(obj);
var arr = [];
for (i = 0; i < keys.length; i++) {
arr.push(JSON.stringify(obj[keys[i]]));
}
return arr;
""";
SELECT
JSON_EXTRACT_SCALAR(formatted_json,'$.amount') as amount
,JSON_EXTRACT_SCALAR(formatted_json,'$.type') as type
,JSON_EXTRACT_SCALAR(formatted_json,'$.total_discount') as total_discount
,JSON_EXTRACT_SCALAR(formatted_json,'$.color') as color
,JSON_EXTRACT_SCALAR(formatted_json,'$.is_out_of_stock') as is_out_of_stock
,JSON_EXTRACT_SCALAR(formatted_json,'$.sku_code') as sku_code
from
testing.json_test
left join unnest(format_json(order_lines)) as formatted_json
结果是:
我有一个 table,其中一列包含原始 JSON 字符串,如下所示:
示例 JSON 存储在 order_lines:
{
"STR_BLK_002":{
"amount":167,
"type":"part spare",
"total_discount":0,
"color":"Black",
"is_out_of_stock":false,
"variable_fields":{
"Size":"XL",
"trueColor":"Black"
},
"category_id":"44356721",
"status_list":[
{
"id":1,
"time":"2021-04-01T15:01:54.746Z",
"status":"ORDER PLACED"
},
{
"id":2,
"time":"2021-04-02T10:31:00.397Z",
"status":"PACKED"
},
{
"id":3,
"time":"2021-04-04T10:31:01.719Z",
"status":"SHIPPED"
},
{
"id":3,
"time":"2021-04-04T18:12:06.896Z",
"status":"SHIPPED"
}
],
"product_id":270,
"price_per_quantity":167,
"quantity":1,
"maximum_quantity":10,
"variant_name":"Helmet strap",
"current_status":30,
"estimated_delivery":"09 Apr 2021",
"total_before_discount":167,
"delivery_statuses":[
{
"time":"2021-04-01T15:10:13.594Z",
"status":"FULFILLABLE"
},
{
"time":"2021-04-02T10:31:00.397Z",
"status":"PACKED"
},
{
"time":"2021-04-03T10:31:01.197Z",
"status":"READY_TO_SHIP"
},
{
"time":"2021-04-04T10:31:01.719Z",
"status":"DISPATCHED"
},
{
"time":"2021-04-04T18:12:06.896Z",
"status":"SHIPPED"
}
],
"sku_code":"STR_BLK_002"
}
}
我想取消嵌套这个字符串,以便可以单独访问键值对。此外,sku_code,(上面共享示例中的 'STR_BLK_002')在任何其他列中都不可用,并且该字符串可以包含更多单个 sku,因此如果有 2 个 sku(s)对应于一个订单那么 JSON 字符串将是:
{
"STR_BLK_002":{
"amount":167,
"type":"part spare",
"total_discount":0,
"color":"Black",
"is_out_of_stock":false,
"variable_fields":{
"Size":"XL",
"trueColor":"Black"
},
"category_id":"44356721",
"status_list":[
{
"id":1,
"time":"2021-04-01T15:01:54.746Z",
"status":"ORDER PLACED"
},
{
"id":2,
"time":"2021-04-02T10:31:00.397Z",
"status":"PACKED"
},
{
"id":3,
"time":"2021-04-04T10:31:01.719Z",
"status":"SHIPPED"
},
{
"id":3,
"time":"2021-04-04T18:12:06.896Z",
"status":"SHIPPED"
}
],
"product_id":270,
"price_per_quantity":167,
"quantity":1,
"maximum_quantity":10,
"variant_name":"Helmet strap",
"current_status":3,
"estimated_delivery":"09 Apr 2021",
"total_before_discount":167,
"delivery_statuses":[
{
"time":"2021-04-01T15:10:13.594Z",
"status":"FULFILLABLE"
},
{
"time":"2021-04-02T10:31:00.397Z",
"status":"PACKED"
},
{
"time":"2021-04-03T10:31:01.197Z",
"status":"READY_TO_SHIP"
},
{
"time":"2021-04-04T10:31:01.719Z",
"status":"DISPATCHED"
},
{
"time":"2021-04-04T18:12:06.896Z",
"status":"SHIPPED"
}
],
"sku_code":"STR_BLK_002"
},
"STR_BLK_008":{
"amount":590,
"type":"accessory",
"total_discount":0,
"color":"blue",
"is_out_of_stock":false,
"variable_fields":{
"Size":"XL",
"trueColor":"prussian blue"
},
"category_id":"65577970",
"status_list":[
{
"id":1,
"time":"2021-04-06T15:01:54.746Z",
"status":"ORDER PLACED"
},
{
"id":2,
"time":"2021-04-07T10:31:00.397Z",
"status":"PACKED"
},
{
"id":3,
"time":"2021-04-07T10:31:01.719Z",
"status":"SHIPPED"
},
{
"id":3,
"time":"2021-04-08T18:12:06.896Z",
"status":"SHIPPED"
}
],
"product_id":276,
"price_per_quantity":590,
"quantity":1,
"maximum_quantity":5,
"variant_name":"Car Perfume",
"current_status":3,
"estimated_delivery":"09 Apr 2021",
"total_before_discount":590,
"delivery_statuses":[
{
"time":"2021-04-06T15:10:13.594Z",
"status":"FULFILLABLE"
},
{
"time":"2021-04-07T10:31:00.397Z",
"status":"PACKED"
},
{
"time":"2021-04-07T10:31:01.197Z",
"status":"READY_TO_SHIP"
},
{
"time":"2021-04-08T10:31:01.719Z",
"status":"DISPATCHED"
},
{
"time":"2021-04-10T18:12:06.896Z",
"status":"SHIPPED"
}
],
"sku_code":"STR_BLK_008"
}
}
我想将此信息分成单独的列,以便我可以为每个 SKU 获取相应的值。
下面应该给你一个好的开始
select
json_extract_scalar(line, '$.sku_code') as sku_code,
json_extract_scalar(line, '$.amount') as amount,
json_extract_scalar(line, '$.type') as type,
json_extract_scalar(line, '$.total_discount') as total_discount,
json_extract_scalar(line, '$.color') as color,
json_extract_scalar(line, '$.variable_fields.Size') as Size,
json_extract_scalar(line, '$.variable_fields.trueColor') as trueColor,
from `project.dataset.table`,
unnest(split(regexp_replace(regexp_replace(order_lines, r'\s', ''), r'"STR_BLK_\d+":{', '"STR_BLK":{'),'"STR_BLK":')) order_line with offset,
unnest([struct('{' || trim(order_line, ',{}}') || '}' as line)])
where offset > 0
如果应用于您问题中的第一个示例 - 输出为
如果应用于您问题中的第二个示例 - 输出为
希望您可以将此示例扩展到您心中的任何最终目标
所以基本上我认为你想要做的是首先将你的列转换为一个结构数组,而不是这样:
{
"STR_BLK_002": {...},
"STR_BLK_003": {...}
}
你有这样的东西:
[
{
"amount":167,
"type":"part spare",
"total_discount":0,
...
},
{
"amount":590,
"type":"accessory",
"total_discount":0,
...
}
]
使用该格式的数据,您可以利用 UNNEST to make each entry into its own row, and then use JSON functions to pull out fields into their own columns, for example JSON_EXTRACT_SCALAR
为了做到这一点,我构建了一个 Javascript UDF 来查找对象中的键,然后遍历每个键以创建一个结构数组。
CREATE TEMP FUNCTION format_json(str STRING)
RETURNS ARRAY<STRING>
LANGUAGE js AS r"""
var obj = JSON.parse(str);
var keys = Object.keys(obj);
var arr = [];
for (i = 0; i < keys.length; i++) {
arr.push(JSON.stringify(obj[keys[i]]));
}
return arr;
""";
SELECT
JSON_EXTRACT_SCALAR(formatted_json,'$.amount') as amount
,JSON_EXTRACT_SCALAR(formatted_json,'$.type') as type
,JSON_EXTRACT_SCALAR(formatted_json,'$.total_discount') as total_discount
,JSON_EXTRACT_SCALAR(formatted_json,'$.color') as color
,JSON_EXTRACT_SCALAR(formatted_json,'$.is_out_of_stock') as is_out_of_stock
,JSON_EXTRACT_SCALAR(formatted_json,'$.sku_code') as sku_code
from
testing.json_test
left join unnest(format_json(order_lines)) as formatted_json
结果是: