在 Teradata 中一次获得多个百分位数(percentile_cont 等价物)
Obtaining multiple percentiles (percentile_cont equivalent) in one pass within Teradata
我知道我们可以在 Teradata 中将 percentile_cont
重写为:
SELECT
part_col
,data_col
+ ((MIN(data_col) OVER (PARTITION BY part_col ORDER BY data_col ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING) - data_col)
* (((COUNT(*) OVER (PARTITION BY part_col) - 1) * x) MOD 1)) AS percentile_cont
FROM tab
QUALIFY ROW_NUMBER() OVER (PARTITION BY part_col ORDER BY data_col)
= CAST((COUNT(*) OVER (PARTITION BY part_col) - 1) * x AS INT) + 1;
有关详细信息,请参阅 this very helpful discussion。
了解将 x
替换为 0.90
会 return 第 90 个百分位数,是否有一种优雅的方式来扩展它并 return 一次通过多个百分位数?
例如,假设我想扩展此示例并 return 一次通过第 25、50 和 75 个百分位数?这可能吗?好像我需要多个 QUALIFY
语句?同样,如果我想要多个 GROUP BY
等价物,这是否类似于在 PARTITION BY
?
中传递更多列
-- SQL:2008 Equivalent pseudo-code
SELECT
part_col_a
,part_col_b
,PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY order_col) AS p25
,PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY order_col) AS p50
,PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY order_col) AS p75
FROM tab
GROUP BY
part_col_a
,part_col_b
你应该完整地阅读我的博客,最终的查询正是你想要的:-)
SELECT part_col
,MIN(pc25) OVER (PARTITION BY part_col) AS quartile_1
,MIN(pc50) OVER (PARTITION BY part_col) AS quartile_2
,MIN(pc75) OVER (PARTITION BY part_col) AS quartile_3
FROM
(
SELECT
part_col
,COUNT(*) OVER (PARTITION BY part_col) - 1 AS N
,ROW_NUMBER() OVER (PARTITION BY part_col ORDER BY data_col) - 1 AS rowno
,MIN(data_col) OVER (PARTITION BY part_col ORDER BY data_col ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING) - data_col AS diff
,CASE
WHEN rowno = CAST(N * 0.25 AS INT)
THEN data_col +(((N * 0.25) MOD 1) * diff)
END AS pc25
,CASE
WHEN rowno = CAST(N * 0.50 AS INT)
THEN data_col +(((N * 0.50) MOD 1) * diff)
END AS pc50
,CASE
WHEN rowno = CAST(N * 0.75 AS INT)
THEN data_col +(((N * 0.75) MOD 1) * diff)
END AS pc75
FROM tab
QUALIFY rowno = CAST(N * 0.25 AS INT)
OR rowno = CAST(N * 0.50 AS INT)
OR rowno = CAST(N * 0.75 AS INT)
) AS dt
QUALIFY ROW_NUMBER() OVER (PARTITION BY part_col ORDER BY part_col) = 1
我知道我们可以在 Teradata 中将 percentile_cont
重写为:
SELECT
part_col
,data_col
+ ((MIN(data_col) OVER (PARTITION BY part_col ORDER BY data_col ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING) - data_col)
* (((COUNT(*) OVER (PARTITION BY part_col) - 1) * x) MOD 1)) AS percentile_cont
FROM tab
QUALIFY ROW_NUMBER() OVER (PARTITION BY part_col ORDER BY data_col)
= CAST((COUNT(*) OVER (PARTITION BY part_col) - 1) * x AS INT) + 1;
有关详细信息,请参阅 this very helpful discussion。
了解将 x
替换为 0.90
会 return 第 90 个百分位数,是否有一种优雅的方式来扩展它并 return 一次通过多个百分位数?
例如,假设我想扩展此示例并 return 一次通过第 25、50 和 75 个百分位数?这可能吗?好像我需要多个 QUALIFY
语句?同样,如果我想要多个 GROUP BY
等价物,这是否类似于在 PARTITION BY
?
-- SQL:2008 Equivalent pseudo-code
SELECT
part_col_a
,part_col_b
,PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY order_col) AS p25
,PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY order_col) AS p50
,PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY order_col) AS p75
FROM tab
GROUP BY
part_col_a
,part_col_b
你应该完整地阅读我的博客,最终的查询正是你想要的:-)
SELECT part_col
,MIN(pc25) OVER (PARTITION BY part_col) AS quartile_1
,MIN(pc50) OVER (PARTITION BY part_col) AS quartile_2
,MIN(pc75) OVER (PARTITION BY part_col) AS quartile_3
FROM
(
SELECT
part_col
,COUNT(*) OVER (PARTITION BY part_col) - 1 AS N
,ROW_NUMBER() OVER (PARTITION BY part_col ORDER BY data_col) - 1 AS rowno
,MIN(data_col) OVER (PARTITION BY part_col ORDER BY data_col ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING) - data_col AS diff
,CASE
WHEN rowno = CAST(N * 0.25 AS INT)
THEN data_col +(((N * 0.25) MOD 1) * diff)
END AS pc25
,CASE
WHEN rowno = CAST(N * 0.50 AS INT)
THEN data_col +(((N * 0.50) MOD 1) * diff)
END AS pc50
,CASE
WHEN rowno = CAST(N * 0.75 AS INT)
THEN data_col +(((N * 0.75) MOD 1) * diff)
END AS pc75
FROM tab
QUALIFY rowno = CAST(N * 0.25 AS INT)
OR rowno = CAST(N * 0.50 AS INT)
OR rowno = CAST(N * 0.75 AS INT)
) AS dt
QUALIFY ROW_NUMBER() OVER (PARTITION BY part_col ORDER BY part_col) = 1