查询以计算多列中的通过值中值
Query to calculate median passing values in many columns
我正在将 SAS 项目转换为 T-SQL,需要计算中位数。
在SAS中可以使用中值函数(例如:SELECT MEDIAN(col1, col2, col3, col4) FROM myTable
),但是在SQL服务器中不存在。
我已经研究并找到了一些很好的例子来计算 T-SQL 中的中位数,但我不知道如何在我的查询中使用它,如下所示:
SELECT
'test' AS colTest,
CASE
WHEN c1 < 0
THEN (10)
ELSE 0
END AS myMedian --this 10 value is just for example, it should be get from a median function
FROM
#tb_test
我一直在尝试下面的例子,得到了这个:
CREATE TABLE #tb_test
(
id INT,
c1 INT,
c2 INT,
c3 INT,
c4 INT
)
INSERT INTO #tb_test VALUES(1, 2, 4, 6, 8)
INSERT INTO #tb_test VALUES(2, -1, 3, 5, 7)
SELECT * FROM #tb_test;
table是:
|id| c1| c2| c3| c4|
+--+---+---+---+---+
|1 | 2 | 4 | 6 | 8 |
|2 |-1 | 3 | 5 | 7 |
据我计算这些列的中位数是:
--my getMedian query:
WITH cte AS
(
SELECT
id, Names, Results
FROM
(SELECT id, c1, c2, c3, c4
FROM #tb_test) p
UNPIVOT
(Results FOR Names IN (c1, c2, c3, c4)
) AS UNPIVOT_tb_test
)
SELECT DISTINCT PERCENTILE_CONT(0.5)
WITHIN GROUP (ORDER BY Results)
OVER (partition BY id) AS Median
FROM cte
这导致:
|Median|
+------+
|4 |
|5 |
我试图在最终查询中包含此 getMedian,但没有成功。
所需的最终查询将是:
SELECT
'test' AS colTest,
CASE
WHEN c1 < 0
THEN ([is it possible to use the getMedian query here?])
ELSE 0
END AS myMedian
FROM
#tb_test
及其各自期望的结果:
|colTest | myMedian |
+--------+----------+
|test | 4 |
|test | 5 |
有人知道我该怎么做吗?
提前致谢。
看来你挺接近的,我简单加了个join就得到了下面的,是你想要的吗?
WITH cte AS
(
SELECT
id, Names, Results
FROM
(SELECT id, c1, c2, c3, c4
FROM tb_test) p
UNPIVOT
(Results FOR Names IN (c1, c2, c3, c4)
) AS UNPIVOT_tb_test
), medians as (
SELECT distinct id, PERCENTILE_CONT(0.5)
WITHIN GROUP (ORDER BY Results)
OVER (partition BY id) AS Median
FROM cte
)
select t1.*
, t2.Median -- add here whatever condition you wish, e.g. case c1<0, etc.
from tb_test as t1 inner join medians as t2 on (t1.id=t2.id)
该功能确实存在。它只需要 apply
:
select t.*, m.median
from #test t cross apply
(select top (1) percentile_cont(0.5) within group (order by c) over () as median
from (values (t.c1), (t.c2), (t.c3), (t.c4)) v(c)
) m;
这种有点作弊。该函数实际上是一个 window 函数,而不是聚合函数。 select top (1)
只是 returns 一行。
According to @AaronBertrand,计算中位数最快的方法是使用 OFFSET/FETCH
。 它的作用是 select 一个或通过对值的总数进行计算并取平均值来获得两个中间值。
您可以在 SELECT
:
的子查询中执行此操作
SELECT
t.*,
Median = CASE WHEN c1 < 0 THEN (
SELECT AVG(m.n * 1.0) FROM (
SELECT n
FROM (VALUES (t.c1),(t.c2),(t.c3),(t.c4)) v(n)
ORDER BY v.n
OFFSET (4 - 1) / 2 ROWS -- 4 is the number of values
FETCH NEXT 1 + (1 - 4 % 2) ROWS ONLY -- again 4
) m
)
ELSE 0 END
FROM #tb_test t
或者您可以将它放在 APPLY
中,然后再 select
SELECT
t.*,
Median = CASE WHEN c1 < 0 THEN m.Median ELSE 0 END
FROM #tb_test t
CROSS APPLY (
SELECT AVG(m.n * 1.0) FROM (
SELECT Median = n
FROM (VALUES (t.c1),(t.c2),(t.c3),(t.c4)) v(n)
ORDER BY v.n
OFFSET (4 - 1) / 2 ROWS -- 4 is the number of values
FETCH NEXT 1 + (1 - 4 % 2) ROWS ONLY -- again 4
) m
) m
我正在将 SAS 项目转换为 T-SQL,需要计算中位数。
在SAS中可以使用中值函数(例如:SELECT MEDIAN(col1, col2, col3, col4) FROM myTable
),但是在SQL服务器中不存在。
我已经研究并找到了一些很好的例子来计算 T-SQL 中的中位数,但我不知道如何在我的查询中使用它,如下所示:
SELECT
'test' AS colTest,
CASE
WHEN c1 < 0
THEN (10)
ELSE 0
END AS myMedian --this 10 value is just for example, it should be get from a median function
FROM
#tb_test
我一直在尝试下面的例子,得到了这个:
CREATE TABLE #tb_test
(
id INT,
c1 INT,
c2 INT,
c3 INT,
c4 INT
)
INSERT INTO #tb_test VALUES(1, 2, 4, 6, 8)
INSERT INTO #tb_test VALUES(2, -1, 3, 5, 7)
SELECT * FROM #tb_test;
table是:
|id| c1| c2| c3| c4|
+--+---+---+---+---+
|1 | 2 | 4 | 6 | 8 |
|2 |-1 | 3 | 5 | 7 |
据我计算这些列的中位数是:
--my getMedian query:
WITH cte AS
(
SELECT
id, Names, Results
FROM
(SELECT id, c1, c2, c3, c4
FROM #tb_test) p
UNPIVOT
(Results FOR Names IN (c1, c2, c3, c4)
) AS UNPIVOT_tb_test
)
SELECT DISTINCT PERCENTILE_CONT(0.5)
WITHIN GROUP (ORDER BY Results)
OVER (partition BY id) AS Median
FROM cte
这导致:
|Median|
+------+
|4 |
|5 |
我试图在最终查询中包含此 getMedian,但没有成功。 所需的最终查询将是:
SELECT
'test' AS colTest,
CASE
WHEN c1 < 0
THEN ([is it possible to use the getMedian query here?])
ELSE 0
END AS myMedian
FROM
#tb_test
及其各自期望的结果:
|colTest | myMedian |
+--------+----------+
|test | 4 |
|test | 5 |
有人知道我该怎么做吗?
提前致谢。
看来你挺接近的,我简单加了个join就得到了下面的,是你想要的吗?
WITH cte AS
(
SELECT
id, Names, Results
FROM
(SELECT id, c1, c2, c3, c4
FROM tb_test) p
UNPIVOT
(Results FOR Names IN (c1, c2, c3, c4)
) AS UNPIVOT_tb_test
), medians as (
SELECT distinct id, PERCENTILE_CONT(0.5)
WITHIN GROUP (ORDER BY Results)
OVER (partition BY id) AS Median
FROM cte
)
select t1.*
, t2.Median -- add here whatever condition you wish, e.g. case c1<0, etc.
from tb_test as t1 inner join medians as t2 on (t1.id=t2.id)
该功能确实存在。它只需要 apply
:
select t.*, m.median
from #test t cross apply
(select top (1) percentile_cont(0.5) within group (order by c) over () as median
from (values (t.c1), (t.c2), (t.c3), (t.c4)) v(c)
) m;
这种有点作弊。该函数实际上是一个 window 函数,而不是聚合函数。 select top (1)
只是 returns 一行。
According to @AaronBertrand,计算中位数最快的方法是使用 OFFSET/FETCH
。 它的作用是 select 一个或通过对值的总数进行计算并取平均值来获得两个中间值。
您可以在 SELECT
:
SELECT
t.*,
Median = CASE WHEN c1 < 0 THEN (
SELECT AVG(m.n * 1.0) FROM (
SELECT n
FROM (VALUES (t.c1),(t.c2),(t.c3),(t.c4)) v(n)
ORDER BY v.n
OFFSET (4 - 1) / 2 ROWS -- 4 is the number of values
FETCH NEXT 1 + (1 - 4 % 2) ROWS ONLY -- again 4
) m
)
ELSE 0 END
FROM #tb_test t
或者您可以将它放在 APPLY
中,然后再 select
SELECT
t.*,
Median = CASE WHEN c1 < 0 THEN m.Median ELSE 0 END
FROM #tb_test t
CROSS APPLY (
SELECT AVG(m.n * 1.0) FROM (
SELECT Median = n
FROM (VALUES (t.c1),(t.c2),(t.c3),(t.c4)) v(n)
ORDER BY v.n
OFFSET (4 - 1) / 2 ROWS -- 4 is the number of values
FETCH NEXT 1 + (1 - 4 % 2) ROWS ONLY -- again 4
) m
) m