Vertica 中三列(每行)的中位数 SQL
Median of three columns (for each row) in Vertica SQL
我的 table 看起来像这样:
产品
长度
宽度
身高
一个
34
22
10
B
40
2
12
我想得到每个产品的最长边、第二长边和第三长边。
对于最长和第三长这很容易,我就像使用 GREATEST() 和 LEAST() 一样。
目前第二长的是我用子查询这样做,但这会使代码更长而且不太干净。
SELECT
product,
longest_side,
third_longest_side,
width + height + length - longest_side - third_longest_side AS second_longest_side
FROM (
SELECT
product,
GREATEST(width, height, length) AS longest_side,
LEAST(width, height, length) AS third_longest_side
...
)
MEDIAN() 函数可以解决我的问题,但它不接受超过一列作为值。不幸的是,下面的示例不起作用。
你知道任何类似的功能可以让我在没有子查询的情况下做到这一点吗?
SELECT
product,
GREATEST(width, height, length) AS longest_side,
LEAST(width, height, length) AS third_longest_side,
MEDIAN(width, height, length) AS second_longest_side
...
您需要使用明确的逻辑。如果值都不同
select t.*,
greatest(width, height, length) as longest_side,
(case when width not in (least(width, height, length), greatest(width, height, length))
then width
when height not in (least(width, height, length), greatest(width, height, length))
then height
else length
end) as middle_side
least(width, height, length) as shortest_side
如果任何值可以是 NULL
或存在重复值,这将变得更加复杂。
垂直化,使用 OLAP 函数,然后再次 GROUP BY:
-- your input
WITH
indata(prd,l,w,h) AS (
SELECT 'A',34,22,10
UNION ALL SELECT 'B',40, 2,12
)
,
-- need three key rows ....
t(t) AS (
SELECT 'l'
UNION ALL SELECT 'w'
UNION ALL SELECT 'h'
)
,
vertical AS (
SELECT
prd
, t
, CASE t
WHEN 'l' THEN l
WHEN 'w' THEN w
WHEN 'h' THEN h
END AS v
FROM indata CROSS JOIN t
)
-- test query ...
-- SELECT * FROM vertical
-- out prd | t | v
-- out -----+---+----
-- out A | l | 34
-- out A | w | 22
-- out A | h | 10
-- out B | l | 40
-- out B | w | 2
-- out B | h | 12
,
olap AS (
SELECT
*
, MAX(v) OVER w AS longest_side
, MIN(v) OVER w AS shortest_side
, MEDIAN(v) OVER w AS medium_side
FROM vertical
WINDOW w AS (PARTITION BY prd)
)
-- test query ...
-- SELECT * FROM olap;
-- out prd | t | v | longest_side | shortest_side | medium_side
-- out -----+---+----+--------------+---------------+-------------
-- out B | w | 2 | 40 | 2 | 12
-- out B | h | 12 | 40 | 2 | 12
-- out B | l | 40 | 40 | 2 | 12
-- out A | h | 10 | 34 | 10 | 22
-- out A | w | 22 | 34 | 10 | 22
-- out A | l | 34 | 34 | 10 | 22
SELECT
prd
, MAX(longest_side) AS longest_side
, MAX(shortest_side) AS shortest_side
, MAX(medium_side) AS medium_side
FROM olap
GROUP BY 1;
-- out prd | longest_side | shortest_side | medium_side
-- out -----+--------------+---------------+-------------
-- out A | 34 | 10 | 22
-- out B | 40 | 2 | 12
我的 table 看起来像这样:
产品 | 长度 | 宽度 | 身高 |
---|---|---|---|
一个 | 34 | 22 | 10 |
B | 40 | 2 | 12 |
我想得到每个产品的最长边、第二长边和第三长边。 对于最长和第三长这很容易,我就像使用 GREATEST() 和 LEAST() 一样。 目前第二长的是我用子查询这样做,但这会使代码更长而且不太干净。
SELECT
product,
longest_side,
third_longest_side,
width + height + length - longest_side - third_longest_side AS second_longest_side
FROM (
SELECT
product,
GREATEST(width, height, length) AS longest_side,
LEAST(width, height, length) AS third_longest_side
...
)
MEDIAN() 函数可以解决我的问题,但它不接受超过一列作为值。不幸的是,下面的示例不起作用。 你知道任何类似的功能可以让我在没有子查询的情况下做到这一点吗?
SELECT
product,
GREATEST(width, height, length) AS longest_side,
LEAST(width, height, length) AS third_longest_side,
MEDIAN(width, height, length) AS second_longest_side
...
您需要使用明确的逻辑。如果值都不同
select t.*,
greatest(width, height, length) as longest_side,
(case when width not in (least(width, height, length), greatest(width, height, length))
then width
when height not in (least(width, height, length), greatest(width, height, length))
then height
else length
end) as middle_side
least(width, height, length) as shortest_side
如果任何值可以是 NULL
或存在重复值,这将变得更加复杂。
垂直化,使用 OLAP 函数,然后再次 GROUP BY:
-- your input
WITH
indata(prd,l,w,h) AS (
SELECT 'A',34,22,10
UNION ALL SELECT 'B',40, 2,12
)
,
-- need three key rows ....
t(t) AS (
SELECT 'l'
UNION ALL SELECT 'w'
UNION ALL SELECT 'h'
)
,
vertical AS (
SELECT
prd
, t
, CASE t
WHEN 'l' THEN l
WHEN 'w' THEN w
WHEN 'h' THEN h
END AS v
FROM indata CROSS JOIN t
)
-- test query ...
-- SELECT * FROM vertical
-- out prd | t | v
-- out -----+---+----
-- out A | l | 34
-- out A | w | 22
-- out A | h | 10
-- out B | l | 40
-- out B | w | 2
-- out B | h | 12
,
olap AS (
SELECT
*
, MAX(v) OVER w AS longest_side
, MIN(v) OVER w AS shortest_side
, MEDIAN(v) OVER w AS medium_side
FROM vertical
WINDOW w AS (PARTITION BY prd)
)
-- test query ...
-- SELECT * FROM olap;
-- out prd | t | v | longest_side | shortest_side | medium_side
-- out -----+---+----+--------------+---------------+-------------
-- out B | w | 2 | 40 | 2 | 12
-- out B | h | 12 | 40 | 2 | 12
-- out B | l | 40 | 40 | 2 | 12
-- out A | h | 10 | 34 | 10 | 22
-- out A | w | 22 | 34 | 10 | 22
-- out A | l | 34 | 34 | 10 | 22
SELECT
prd
, MAX(longest_side) AS longest_side
, MAX(shortest_side) AS shortest_side
, MAX(medium_side) AS medium_side
FROM olap
GROUP BY 1;
-- out prd | longest_side | shortest_side | medium_side
-- out -----+--------------+---------------+-------------
-- out A | 34 | 10 | 22
-- out B | 40 | 2 | 12