SQL:如何根据其值旋转行
SQL: how to pivot rows based on its value
我要转换这个数据:
id value
---------
1 v1=10
1 v2=20
1 v3=30
2 v1=40
看起来像这样:
id v1 v2 v3
---------------------------
1 10 20 30
2 40 null null
有什么解决办法吗?我想解决它 bigquery 的标准 sql.
SELECT
id,
MAX(IF(name = 'v1', value, NULL)) AS v1,
MAX(IF(name = 'v2', value, NULL)) AS v2,
MAX(IF(name = 'v3', value, NULL)) AS v3
FROM (
SELECT
SPLIT(value, '=')[OFFSET(0)] AS name,
SPLIT(value, '=')[OFFSET(1)] AS value
FROM dataset.table
)
GROUP BY id
以下几个选项 - 全部用于 BigQuery Standrad SQL
Option 1 - hardcoded pivoting
假设您事先知道要创建的列的数量和名称
在这种情况下,下面就是您需要的
#standardSQL
SELECT
id,
MAX(IF(key = 'v1', val, NULL)) v1,
MAX(IF(key = 'v2', val, NULL)) v2,
MAX(IF(key = 'v3', val, NULL)) v3
FROM `project.dataset.table`,
UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value,"=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])])
GROUP BY id
您可以使用您问题中的虚拟数据进行测试,如下所示
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 'v1=10' value UNION ALL
SELECT 1, 'v2=20' UNION ALL
SELECT 1, 'v3=30' UNION ALL
SELECT 2, 'v1=40'
)
SELECT
id,
MAX(IF(key = 'v1', val, NULL)) v1,
MAX(IF(key = 'v2', val, NULL)) v2,
MAX(IF(key = 'v3', val, NULL)) v3
FROM `project.dataset.table`,
UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value,"=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])])
GROUP BY id
-- ORDER BY id
结果符合预期:
Row id v1 v2 v3
1 1 10 20 30
2 2 40 null null
Option 2 - dynamic pivoting
如果您不知道列的数量和名称 - 您首先需要生成类似于上述选项 #1 中的脚本
你应该 运行 下面得到它
#standardSQL
SELECT CONCAT('SELECT id, ',
STRING_AGG(
CONCAT('MAX(IF(key = "', key, '", val, NULL)) as ', key)
)
,' FROM `project.dataset.table`, UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value, "=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])]) GROUP BY id')
FROM (
SELECT SPLIT(value, '=')[OFFSET(0)] key
FROM `project.dataset.table`
GROUP BY key
)
例如,如果你将 运行 它对抗同一个假人
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 'v1=10' value UNION ALL
SELECT 1, 'v2=20' UNION ALL
SELECT 1, 'v3=30' UNION ALL
SELECT 2, 'v1=40'
)
SELECT CONCAT('SELECT id, ',
STRING_AGG(
CONCAT('MAX(IF(key = "', key, '", val, NULL)) as ', key)
)
,' FROM `project.dataset.table`, UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value, "=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])]) GROUP BY id')
FROM (
SELECT SPLIT(value, '=')[OFFSET(0)] key
FROM `project.dataset.table`
GROUP BY key
)
您将得到以下字符串
SELECT id, MAX(IF(key = "v1", val, NULL)) AS v1,MAX(IF(key = "v2", val, NULL)) AS v2,MAX(IF(key = "v3", val, NULL)) AS v3 FROM `project.dataset.table`, UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value, "=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])]) GROUP BY id
那么,现在 - 如果你 运行 这个脚本针对你的虚拟数据
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 'v1=10' value UNION ALL
SELECT 1, 'v2=20' UNION ALL
SELECT 1, 'v3=30' UNION ALL
SELECT 2, 'v1=40'
)
SELECT id, MAX(IF(key = "v1", val, NULL)) AS v1,MAX(IF(key = "v2", val, NULL)) AS v2,MAX(IF(key = "v3", val, NULL)) AS v3 FROM `project.dataset.table`, UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value, "=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])]) GROUP BY id
您将获得与选项 1 相同的结果 - 但不是 - 现在最终查询是为您动态生成的
Option 3 - flatten key - value vs. pivot
旋转很酷,但对于许多实际情况,下面的简单方法被发现非常有用,更适合处理
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 'v1=10' value UNION ALL
SELECT 1, 'v2=20' UNION ALL
SELECT 1, 'v3=30' UNION ALL
SELECT 2, 'v1=40'
)
SELECT
id,
SPLIT(value,"=")[OFFSET(0)] key,
SPLIT(value, "=")[OFFSET(1)] val
FROM `project.dataset.table`
这给出了普通键值扁平化结构
Row id key val
1 1 v1 10
2 1 v2 20
3 1 v3 30
4 2 v1 40
我要转换这个数据:
id value
---------
1 v1=10
1 v2=20
1 v3=30
2 v1=40
看起来像这样:
id v1 v2 v3
---------------------------
1 10 20 30
2 40 null null
有什么解决办法吗?我想解决它 bigquery 的标准 sql.
SELECT
id,
MAX(IF(name = 'v1', value, NULL)) AS v1,
MAX(IF(name = 'v2', value, NULL)) AS v2,
MAX(IF(name = 'v3', value, NULL)) AS v3
FROM (
SELECT
SPLIT(value, '=')[OFFSET(0)] AS name,
SPLIT(value, '=')[OFFSET(1)] AS value
FROM dataset.table
)
GROUP BY id
以下几个选项 - 全部用于 BigQuery Standrad SQL
Option 1 - hardcoded pivoting
假设您事先知道要创建的列的数量和名称
在这种情况下,下面就是您需要的
#standardSQL
SELECT
id,
MAX(IF(key = 'v1', val, NULL)) v1,
MAX(IF(key = 'v2', val, NULL)) v2,
MAX(IF(key = 'v3', val, NULL)) v3
FROM `project.dataset.table`,
UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value,"=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])])
GROUP BY id
您可以使用您问题中的虚拟数据进行测试,如下所示
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 'v1=10' value UNION ALL
SELECT 1, 'v2=20' UNION ALL
SELECT 1, 'v3=30' UNION ALL
SELECT 2, 'v1=40'
)
SELECT
id,
MAX(IF(key = 'v1', val, NULL)) v1,
MAX(IF(key = 'v2', val, NULL)) v2,
MAX(IF(key = 'v3', val, NULL)) v3
FROM `project.dataset.table`,
UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value,"=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])])
GROUP BY id
-- ORDER BY id
结果符合预期:
Row id v1 v2 v3
1 1 10 20 30
2 2 40 null null
Option 2 - dynamic pivoting
如果您不知道列的数量和名称 - 您首先需要生成类似于上述选项 #1 中的脚本
你应该 运行 下面得到它
#standardSQL
SELECT CONCAT('SELECT id, ',
STRING_AGG(
CONCAT('MAX(IF(key = "', key, '", val, NULL)) as ', key)
)
,' FROM `project.dataset.table`, UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value, "=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])]) GROUP BY id')
FROM (
SELECT SPLIT(value, '=')[OFFSET(0)] key
FROM `project.dataset.table`
GROUP BY key
)
例如,如果你将 运行 它对抗同一个假人
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 'v1=10' value UNION ALL
SELECT 1, 'v2=20' UNION ALL
SELECT 1, 'v3=30' UNION ALL
SELECT 2, 'v1=40'
)
SELECT CONCAT('SELECT id, ',
STRING_AGG(
CONCAT('MAX(IF(key = "', key, '", val, NULL)) as ', key)
)
,' FROM `project.dataset.table`, UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value, "=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])]) GROUP BY id')
FROM (
SELECT SPLIT(value, '=')[OFFSET(0)] key
FROM `project.dataset.table`
GROUP BY key
)
您将得到以下字符串
SELECT id, MAX(IF(key = "v1", val, NULL)) AS v1,MAX(IF(key = "v2", val, NULL)) AS v2,MAX(IF(key = "v3", val, NULL)) AS v3 FROM `project.dataset.table`, UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value, "=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])]) GROUP BY id
那么,现在 - 如果你 运行 这个脚本针对你的虚拟数据
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 'v1=10' value UNION ALL
SELECT 1, 'v2=20' UNION ALL
SELECT 1, 'v3=30' UNION ALL
SELECT 2, 'v1=40'
)
SELECT id, MAX(IF(key = "v1", val, NULL)) AS v1,MAX(IF(key = "v2", val, NULL)) AS v2,MAX(IF(key = "v3", val, NULL)) AS v3 FROM `project.dataset.table`, UNNEST([STRUCT<key STRING, val STRING>(SPLIT(value, "=")[OFFSET(0)], SPLIT(value, "=")[OFFSET(1)])]) GROUP BY id
您将获得与选项 1 相同的结果 - 但不是 - 现在最终查询是为您动态生成的
Option 3 - flatten key - value vs. pivot
旋转很酷,但对于许多实际情况,下面的简单方法被发现非常有用,更适合处理
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 'v1=10' value UNION ALL
SELECT 1, 'v2=20' UNION ALL
SELECT 1, 'v3=30' UNION ALL
SELECT 2, 'v1=40'
)
SELECT
id,
SPLIT(value,"=")[OFFSET(0)] key,
SPLIT(value, "=")[OFFSET(1)] val
FROM `project.dataset.table`
这给出了普通键值扁平化结构
Row id key val
1 1 v1 10
2 1 v2 20
3 1 v3 30
4 2 v1 40