PostgreSQL - 将具有多个属性的多行合并为一行?
PostgreSQL - Combining multiple rows with several attributes into one row?
我有一个 table 这样的:
DATE ID ScoreA ScoreB ScoreC
20180101 001 91 92 25
20180101 002 81 82 35
20180101 003 71 52 45
20180102 001 82 15 66
20180102 002 69 67 77
...
20180131 003 88 65 73
以一个月的数据为例,我想将它们汇总成一个 MAX 和 MIN 分数的报告,每个分数只有一行 ID
。就像:
ID ScoreA Date_A ScoreB Date_B ...
001 MAX(ScoreA) MAX(ScoreA).DATE MAX(ScoreB) MAX(ScoreB).DATE ...
002 MAX(ScoreA) MAX(ScoreA).DATE MAX(ScoreB) MAX(ScoreB).DATE ...
003 MAX(ScoreA) MAX(ScoreA).DATE MAX(ScoreB) MAX(ScoreB).DATE ...
其中MAX(ScoreA).DATE
表示相应的MAX或MIN分数出现时的DATE
(如果MAX分数出现在多个日期,则随机选择一个)
不像常见的合并行的情况,它同时涉及多个列。因为会有 MANY ID
s 和 HUNDREDS of Score
s (我的意思是有 ScroeA
ScroreB
... ScoreZ
... Score1
Score2
... Score100
...),我希望避免使用消耗操作,例如JOIN
table。那么有什么好主意吗?
如果您想避免连接,我会提供这样的结构
WITH cte AS (
SELECT DATE, ID, ScoreA, ScoreB, ScoreC,
row_number() over (partition by ID order by ScoreA desc) rnA,
row_number() over (partition by ID order by ScoreB desc) rnB,
row_number() over (partition by ID order by ScoreC desc) rnC,
FROM ...
WHERE DATE BETWEEN ... AND ...
), ids AS (
SELECT DISTINCT ID FROM cte
)
SELECT ID,
(SELECT ScoreA FROM cte t2 WHERE t2.ID = t.ID AND rnA = 1) ScoreA,
(SELECT DATE FROM cte t2 WHERE t2.ID = t.ID AND rnA = 1) Date_A,
(SELECT ScoreB FROM cte t2 WHERE t2.ID = t.ID AND rnB = 1) ScoreB,
(SELECT DATE FROM cte t2 WHERE t2.ID = t.ID AND rnB = 1) Date_B,
(SELECT ScoreC FROM cte t2 WHERE t2.ID = t.ID AND rnC = 1) ScoreC,
(SELECT DATE FROM cte t2 WHERE t2.ID = t.ID AND rnC = 1) Date_C
FROM ids t
当您需要 max/min 值的日期或其他属性时,使用行编号而不是聚合函数是合理的:row_number() over (...) as rn
后跟条件 rn = 1
UPD
只要@TaurusDang 想要代码生成,我的解决方案就是让 postgres 完成几乎所有的工作:
WITH cols AS
(
SELECT column_name
FROM information_schema.columns
WHERE table_schema = 'your_schema'
AND table_name = 'your_table'
AND column_name like 'Score%'
)
-- first part: rows for cte subquery
SELECT ',row_number() over (partition by ID order by ' || column_name || ' desc) rn' || column_name
FROM cols
UNION ALL
-- second part: rows for final query
SELECT ',(SELECT ' || column_name || ' FROM cte t2 WHERE t2.ID = t.ID AND rn' || column_name || ' = 1) ' || column_name || ', (SELECT DATE FROM cte t2 WHERE t2.ID = t.ID AND rn' || column_name || ' = 1) Date_' || column_name
FROM cols
只需将生成的行复制到初始查询中:前半部分复制到 cte,下半部分复制到主查询
试试这个
with max_score as
(
Select distinct id
, max(ScoreA) over( partition by id ) as max_ScoreA
, max(ScoreB) over( partition by id ) as max_ScoreB
, max(ScoreC) over( partition by id ) as max_Scorec
from TABLE_NAME
)
Select
cte.id
, max_ScoreA, tbl_a.DATE
, max_ScoreB, tbl_b.DATE
, max_ScoreC, tbl_c.DATE
from
max_score cte
join TABLE_NAME tbl_a
on cte.id = tbl_a.id
and cte.max_ScoreA = tbl_a.ScoreA
join TABLE_NAME tbl_b
on cte.id = tbl_b.id
and cte.max_ScoreB = tbl_b.ScoreB
join TABLE_NAME tbl_c
on cte.id = tbl_c.id
and cte.max_ScoreC = tbl_c.ScoreC
order by 1
这是另一个代码示例,将为您提供所需的所有数据:
select *
from (select
distinct on (id) id,
first_value(scorea) over w as a_min,
last_value(scorea) over w as a_max,
first_value(date) over w as a_min_d,
last_value(date) over w as a_max_d
from the_table
window w as (partition by id order by scorea)
order by 1,3 desc) a
join (select
distinct on (id) id,
first_value(scoreb) over w as b_min,
last_value(scoreb) over w as b_max,
first_value(date) over w as b_min_d,
last_value(date) over w as b_max_d
from the_table
window w as (partition by id order by scoreb)
order by 1,3 desc) b using(id)
join (select
distinct on (id) id,
first_value(scorec) over w as c_min,
last_value(scorec) over w as c_max,
first_value(date) over w as c_min_d,
last_value(date) over w as c_max_d
from the_table
window w as (partition by id order by scorec)
order by 1,3 desc) c using(id)
请注意,有 3 个单独的子查询,每个分数列一个。这里有一些关于 windowing-functions and partitions 的 magic,这对继续阅读很有用。这里有一个棘手的部分是,如果放在同一个查询中,各个分区会相互干扰(至少在我的 pg 9.3.22 上)。
我有一个 table 这样的:
DATE ID ScoreA ScoreB ScoreC
20180101 001 91 92 25
20180101 002 81 82 35
20180101 003 71 52 45
20180102 001 82 15 66
20180102 002 69 67 77
...
20180131 003 88 65 73
以一个月的数据为例,我想将它们汇总成一个 MAX 和 MIN 分数的报告,每个分数只有一行 ID
。就像:
ID ScoreA Date_A ScoreB Date_B ...
001 MAX(ScoreA) MAX(ScoreA).DATE MAX(ScoreB) MAX(ScoreB).DATE ...
002 MAX(ScoreA) MAX(ScoreA).DATE MAX(ScoreB) MAX(ScoreB).DATE ...
003 MAX(ScoreA) MAX(ScoreA).DATE MAX(ScoreB) MAX(ScoreB).DATE ...
其中MAX(ScoreA).DATE
表示相应的MAX或MIN分数出现时的DATE
(如果MAX分数出现在多个日期,则随机选择一个)
不像常见的合并行的情况,它同时涉及多个列。因为会有 MANY ID
s 和 HUNDREDS of Score
s (我的意思是有 ScroeA
ScroreB
... ScoreZ
... Score1
Score2
... Score100
...),我希望避免使用消耗操作,例如JOIN
table。那么有什么好主意吗?
如果您想避免连接,我会提供这样的结构
WITH cte AS (
SELECT DATE, ID, ScoreA, ScoreB, ScoreC,
row_number() over (partition by ID order by ScoreA desc) rnA,
row_number() over (partition by ID order by ScoreB desc) rnB,
row_number() over (partition by ID order by ScoreC desc) rnC,
FROM ...
WHERE DATE BETWEEN ... AND ...
), ids AS (
SELECT DISTINCT ID FROM cte
)
SELECT ID,
(SELECT ScoreA FROM cte t2 WHERE t2.ID = t.ID AND rnA = 1) ScoreA,
(SELECT DATE FROM cte t2 WHERE t2.ID = t.ID AND rnA = 1) Date_A,
(SELECT ScoreB FROM cte t2 WHERE t2.ID = t.ID AND rnB = 1) ScoreB,
(SELECT DATE FROM cte t2 WHERE t2.ID = t.ID AND rnB = 1) Date_B,
(SELECT ScoreC FROM cte t2 WHERE t2.ID = t.ID AND rnC = 1) ScoreC,
(SELECT DATE FROM cte t2 WHERE t2.ID = t.ID AND rnC = 1) Date_C
FROM ids t
当您需要 max/min 值的日期或其他属性时,使用行编号而不是聚合函数是合理的:row_number() over (...) as rn
后跟条件 rn = 1
UPD
只要@TaurusDang 想要代码生成,我的解决方案就是让 postgres 完成几乎所有的工作:
WITH cols AS
(
SELECT column_name
FROM information_schema.columns
WHERE table_schema = 'your_schema'
AND table_name = 'your_table'
AND column_name like 'Score%'
)
-- first part: rows for cte subquery
SELECT ',row_number() over (partition by ID order by ' || column_name || ' desc) rn' || column_name
FROM cols
UNION ALL
-- second part: rows for final query
SELECT ',(SELECT ' || column_name || ' FROM cte t2 WHERE t2.ID = t.ID AND rn' || column_name || ' = 1) ' || column_name || ', (SELECT DATE FROM cte t2 WHERE t2.ID = t.ID AND rn' || column_name || ' = 1) Date_' || column_name
FROM cols
只需将生成的行复制到初始查询中:前半部分复制到 cte,下半部分复制到主查询
试试这个
with max_score as
(
Select distinct id
, max(ScoreA) over( partition by id ) as max_ScoreA
, max(ScoreB) over( partition by id ) as max_ScoreB
, max(ScoreC) over( partition by id ) as max_Scorec
from TABLE_NAME
)
Select
cte.id
, max_ScoreA, tbl_a.DATE
, max_ScoreB, tbl_b.DATE
, max_ScoreC, tbl_c.DATE
from
max_score cte
join TABLE_NAME tbl_a
on cte.id = tbl_a.id
and cte.max_ScoreA = tbl_a.ScoreA
join TABLE_NAME tbl_b
on cte.id = tbl_b.id
and cte.max_ScoreB = tbl_b.ScoreB
join TABLE_NAME tbl_c
on cte.id = tbl_c.id
and cte.max_ScoreC = tbl_c.ScoreC
order by 1
这是另一个代码示例,将为您提供所需的所有数据:
select *
from (select
distinct on (id) id,
first_value(scorea) over w as a_min,
last_value(scorea) over w as a_max,
first_value(date) over w as a_min_d,
last_value(date) over w as a_max_d
from the_table
window w as (partition by id order by scorea)
order by 1,3 desc) a
join (select
distinct on (id) id,
first_value(scoreb) over w as b_min,
last_value(scoreb) over w as b_max,
first_value(date) over w as b_min_d,
last_value(date) over w as b_max_d
from the_table
window w as (partition by id order by scoreb)
order by 1,3 desc) b using(id)
join (select
distinct on (id) id,
first_value(scorec) over w as c_min,
last_value(scorec) over w as c_max,
first_value(date) over w as c_min_d,
last_value(date) over w as c_max_d
from the_table
window w as (partition by id order by scorec)
order by 1,3 desc) c using(id)
请注意,有 3 个单独的子查询,每个分数列一个。这里有一些关于 windowing-functions and partitions 的 magic,这对继续阅读很有用。这里有一个棘手的部分是,如果放在同一个查询中,各个分区会相互干扰(至少在我的 pg 9.3.22 上)。