BigQuery - 为多个组计算多个列的 0 - 100 个百分位数
BigQuery - Compute 0 - 100 percentiles for multiple columns, over multiple groups
我们有一个大查询 table 如下所示:
with
my_data as (
select 1 as num1, 32 as num2, 43 as num3, 'a' as letter union all
select 2 as num1, 21 as num2, 45 as num3, 'a' as letter union all
select 3 as num1, 99 as num2, 47 as num3, 'a' as letter union all
select 4 as num1, 83 as num2, 48 as num3, 'a' as letter union all
select 5 as num1, 55 as num2, 49 as num3, 'a' as letter union all
select 6 as num1, 35 as num2, 51 as num3, 'b' as letter union all
select 7 as num1, 94 as num2, 52 as num3, 'b' as letter union all
select 8 as num1, 17 as num2, 55 as num3, 'b' as letter union all
select 9 as num1, 33 as num2, 56 as num3, 'b' as letter union all
select 10 as num1, 81 as num2, 37 as num3, 'b' as letter union all
select 11 as num1, 42 as num2, 38 as num3, 'a' as letter union all
select 12 as num1, 26 as num2, 39 as num3, 'a' as letter union all
select 13 as num1, 92 as num2, 41 as num3, 'a' as letter union all
select 14 as num1, 38 as num2, 43 as num3, 'a' as letter union all
select 15 as num1, 31 as num2, 46 as num3, 'a' as letter union all
select 16 as num1, 53 as num2, 48 as num3, 'b' as letter union all
select 17 as num1, 49 as num2, 49 as num3, 'b' as letter union all
select 18 as num1, 71 as num2, 51 as num3, 'b' as letter union all
select 19 as num1, 67 as num2, 52 as num3, 'b' as letter union all
select 20 as num1, 62 as num2, 54 as num3, 'b' as letter
)
letter
是分组依据的列,num1, num2, num3
是我们要为其计算 0 - 100 %iles 的 3 列。更清楚地说,我们想要 return 一个具有 202 行和列 letter pctile value1 value2 value3
的 table。 letter
是a
(101次)和b
(101次),pctile
从0,1,2,3... 100,0,1,2,3... 100
走,value1 value2 value3
是对应的值第 0、1、2、3、4 等百分位数(对于每个组/字母)。
我之前在此处 post 编辑了这个非常相似的问题 - - 其中提供了一个有用的解决方案。但是,此解决方案适用于仅针对单个列计算 0 - 100 %ile 行的基本情况。现在,在我们数据的真实示例中,我们正在处理多列。之前 post 的解决方案在扩展到我们的 3 列新数据时不起作用。
SELECT letter, pctile, value1, value2, value3
FROM (
SELECT
letter,
APPROX_QUANTILES(num1, 100) AS value1,
APPROX_QUANTILES(num2, 100) AS value2,
APPROX_QUANTILES(num3, 100) AS value3,
FROM my_data
GROUP BY letter
) as t,
t.value1 WITH OFFSET AS pctile
这 技术上 return 202 行,但是 value2
和 value3
的每一行中的值不是单独的值,而是而似乎是整个长度 == 100 的数组。我尝试了不同的方法来获得所需的结果(202 行,每行都有正确的 value1 value2 value3
值),但没有成功。这可能吗?
试试下面
SELECT letter, pctile, value1, value2, value3
FROM (
SELECT
letter,
APPROX_QUANTILES(num1, 100) AS value1,
APPROX_QUANTILES(num2, 100) AS value2,
APPROX_QUANTILES(num3, 100) AS value3,
FROM my_data
GROUP BY letter
) as t
,t.value1 WITH OFFSET AS pctile
,t.value2 WITH OFFSET AS pctile2
,t.value3 WITH OFFSET AS pctile3
WHERE pctile = pctile2
AND pctile = pctile3
我们有一个大查询 table 如下所示:
with
my_data as (
select 1 as num1, 32 as num2, 43 as num3, 'a' as letter union all
select 2 as num1, 21 as num2, 45 as num3, 'a' as letter union all
select 3 as num1, 99 as num2, 47 as num3, 'a' as letter union all
select 4 as num1, 83 as num2, 48 as num3, 'a' as letter union all
select 5 as num1, 55 as num2, 49 as num3, 'a' as letter union all
select 6 as num1, 35 as num2, 51 as num3, 'b' as letter union all
select 7 as num1, 94 as num2, 52 as num3, 'b' as letter union all
select 8 as num1, 17 as num2, 55 as num3, 'b' as letter union all
select 9 as num1, 33 as num2, 56 as num3, 'b' as letter union all
select 10 as num1, 81 as num2, 37 as num3, 'b' as letter union all
select 11 as num1, 42 as num2, 38 as num3, 'a' as letter union all
select 12 as num1, 26 as num2, 39 as num3, 'a' as letter union all
select 13 as num1, 92 as num2, 41 as num3, 'a' as letter union all
select 14 as num1, 38 as num2, 43 as num3, 'a' as letter union all
select 15 as num1, 31 as num2, 46 as num3, 'a' as letter union all
select 16 as num1, 53 as num2, 48 as num3, 'b' as letter union all
select 17 as num1, 49 as num2, 49 as num3, 'b' as letter union all
select 18 as num1, 71 as num2, 51 as num3, 'b' as letter union all
select 19 as num1, 67 as num2, 52 as num3, 'b' as letter union all
select 20 as num1, 62 as num2, 54 as num3, 'b' as letter
)
letter
是分组依据的列,num1, num2, num3
是我们要为其计算 0 - 100 %iles 的 3 列。更清楚地说,我们想要 return 一个具有 202 行和列 letter pctile value1 value2 value3
的 table。 letter
是a
(101次)和b
(101次),pctile
从0,1,2,3... 100,0,1,2,3... 100
走,value1 value2 value3
是对应的值第 0、1、2、3、4 等百分位数(对于每个组/字母)。
我之前在此处 post 编辑了这个非常相似的问题 -
SELECT letter, pctile, value1, value2, value3
FROM (
SELECT
letter,
APPROX_QUANTILES(num1, 100) AS value1,
APPROX_QUANTILES(num2, 100) AS value2,
APPROX_QUANTILES(num3, 100) AS value3,
FROM my_data
GROUP BY letter
) as t,
t.value1 WITH OFFSET AS pctile
这 技术上 return 202 行,但是 value2
和 value3
的每一行中的值不是单独的值,而是而似乎是整个长度 == 100 的数组。我尝试了不同的方法来获得所需的结果(202 行,每行都有正确的 value1 value2 value3
值),但没有成功。这可能吗?
试试下面
SELECT letter, pctile, value1, value2, value3
FROM (
SELECT
letter,
APPROX_QUANTILES(num1, 100) AS value1,
APPROX_QUANTILES(num2, 100) AS value2,
APPROX_QUANTILES(num3, 100) AS value3,
FROM my_data
GROUP BY letter
) as t
,t.value1 WITH OFFSET AS pctile
,t.value2 WITH OFFSET AS pctile2
,t.value3 WITH OFFSET AS pctile3
WHERE pctile = pctile2
AND pctile = pctile3