将 bigquery json 字符串转换为列

convert bigquery json string to columns

对于以字符串形式出现的 json 数据,我希望有类似 JSON_EXTRACT_SCALAR 的内容,但对于灵活数量的结果列。

这是示例数据 - 不同的行可以有不同的列名,并且 json 可以嵌套:

WITH `my_table` AS (
  SELECT '{"sku_types":"{\"id\":\"5433306\",\"product_code\":\"adfklj_ewkj\"}","additional_info":"Face 30 ml","stock_level":"20+"}' as json_string 
  union all 
  SELECT '{"additional_info":"Face 100 ml","offer_info":"30%"}' as json_string 
)
SELECT * 
from my_table;

我想将此数据提取到单独的列中:sku_types.id, sku_types.product_code, additional_info, stock_level, offer_info

可以在 SQL 内完成还是 javascript 有必要?

我事先不知道 json 字段的名称,所以我无法使用 JSON_EXTRACT_SCALARJSON_EXTRACT.

以下是 BigQuery 标准的示例 SQL

#standardSQL
CREATE TEMPORARY FUNCTION parseJson(y STRING)
RETURNS ARRAY<STRING>
LANGUAGE js AS """
  var z = new Array();
  processKey(JSON.parse(y), '');
  function processKey(node, parent) {
    Object.keys(node).map(function(key) {
      value = node[key].toString();
      if (value !== '[object Object]') {
        if (parent !== '' && parent.substr(parent.length-1) !== '.') {
          z.push(parent + '.' + key + ':' + value)
        } else {
          z.push(key + ':' + value)
        }
      } else {
        if (parent !== '' && parent.substr(parent.length-1) !== '.') {parent += '.'};
        processKey(node[key], parent + key);
      };
    });         
  };
  return z
""";
WITH `my_table` AS (
  SELECT 1 id, '{"sku_types":{"id":"5433306","product_code":"adfklj_ewkj"},"additional_info":"Face 30 ml","stock_level":"20+"}' AS json_string UNION ALL 
  SELECT 2, '{"additional_info":"Face 100 ml","offer_info":"30%"}' AS json_string 
)
SELECT id, 
  ARRAY(
    SELECT AS STRUCT SPLIT(kv, ':')[OFFSET(0)] key, SPLIT(kv, ':')[SAFE_OFFSET(1)] value
    FROM UNNEST(parseJson(json_string)) kv
  ) params
FROM my_table

结果

Row id  params.key              params.value     
1   1   sku_types.id            5433306  
        sku_types.product_code  adfklj_ewkj  
        additional_info         Face 30 ml   
        stock_level             20+  
2   2   additional_info         Face 100 ml  
        offer_info              30%     

正如您所看到的,而不是将所有可能的属性解析到单独的列中(这在这里是不可能的 - 除非你事先知道它们) - 上面的方法将它们扁平化为参数数组中的 key:value 对

注意:在上面的示例中,我使用 : 构建 key:value 对,然后拆分它们。如果您希望值具有此字符 - 您可以调整代码而不是 : 使用更独特的东西 - 例如 :::::::

Quick update to address comment:
... problem is that some of json values are null, in which case it throws and error

#standardSQL
CREATE TEMPORARY FUNCTION parseJson(y STRING)
RETURNS ARRAY<STRING>
LANGUAGE js AS """
  var z = new Array();
  processKey(JSON.parse(y), '');
  function processKey(node, parent) {
    Object.keys(node).map(function(key) {
      if (!node[key]) {
        value = 'n/a'
      } else {
        value = node[key].toString();
      }
      if (value !== '[object Object]') {
        if (parent !== '' && parent.substr(parent.length-1) !== '.') {
          z.push(parent + '.' + key + ':' + value)
        } else {
          z.push(key + ':' + value)
        }
      } else {
        if (parent !== '' && parent.substr(parent.length-1) !== '.') {parent += '.'};
        processKey(node[key], parent + key);
      };
    });         
  };
  return z
""";
WITH `my_table` AS (
  SELECT 1 id, '{"sku_types":{"id":"5433306","product_code":"adfklj_ewkj"},"additional_info":"Face 30 ml","stock_level":"20+"}' AS json_string UNION ALL 
  SELECT 2, '{"additional_info":"Face 100 ml","offer_info":"30%"}' AS json_string union all
  SELECT 3 as id , '{"offer_info":"30%", "price":null}' AS json_string  
)
SELECT id, 
  ARRAY(
    SELECT AS STRUCT SPLIT(kv, ':')[OFFSET(0)] key, SPLIT(kv, ':')[SAFE_OFFSET(1)] value
    FROM UNNEST(parseJson(json_string)) kv
  ) params
FROM my_table  

结果

Row id  params.key              params.value     
1   1   sku_types.id            5433306  
        sku_types.product_code  adfklj_ewkj  
        additional_info         Face 30 ml   
        stock_level             20+  
2   2   additional_info         Face 100 ml  
        offer_info              30%  
3   3   offer_info              30%  
        price                   n/a    

如您所见 - 我正在用 'n/a' 替换空值,但您可以应用任何您想要的逻辑