查询以从每个组中找到第二大值
Query to find second largest value from every group
我有三个表:
project: project_id, project_name
milestone: milestone_id, milestone_name
project_milestone: id, project_id, milestone_id, completed_date
我想从按 project_id 分组的 project_milestone 中获得第二高的 completed_date 和 milestone_id。那就是我想为每个项目获得第二高 completed_date 的 milestone_id。正确的查询是什么?
我想你可以用 project_milestone
table 和 row_number()
做你想做的事:
select pm.*
from (select pm.*,
row_number() over (partition by project_id order by completed_date desc) as seqnum
from project_milestone pm
where pm.completed_date is not null
) pm
where seqnum = 2;
如果您需要包括 所有 个项目,即使是那些没有两个里程碑的项目,您也可以使用 left join
:
select p.project_id, pm.milestone_id, pm.completed_date
from projects p left join
(select pm.*,
row_number() over (partition by project_id order by completed_date desc) as seqnum
from project_milestone pm
where pm.completed_date is not null
) pm
on p.project_id = pm.project_id and pm.seqnum = 2;
使用 LATERAL (PG 9.3+) 可以产生比 window 函数版本更好的性能。
SELECT * FROM project;
project_id | project_name
------------+--------------
1 | Project A
2 | Project B
SELECT * FROM project_milestone;
id | project_id | milestone_id | completed_date
----+------------+--------------+------------------------
1 | 1 | 1 | 2000-01-01 00:00:00+01
2 | 1 | 2 | 2000-01-02 00:00:00+01
3 | 1 | 5 | 2000-01-03 00:00:00+01
4 | 1 | 6 | 2000-01-04 00:00:00+01
5 | 2 | 3 | 2000-02-01 00:00:00+01
6 | 2 | 4 | 2000-02-02 00:00:00+01
7 | 2 | 7 | 2000-02-03 00:00:00+01
8 | 2 | 8 | 2000-02-04 00:00:00+01
SELECT *
FROM project p
CROSS JOIN LATERAL (
SELECT milestone_id, completed_date
FROM project_milestone pm
WHERE pm.project_id = p.project_id
ORDER BY completed_date ASC
LIMIT 1
OFFSET 1
) second_highest;
project_id | project_name | milestone_id | completed_date
------------+--------------+--------------+------------------------
1 | Project A | 2 | 2000-01-02 00:00:00+01
2 | Project B | 4 | 2000-02-02 00:00:00+01
实现此目的的最简单方法是使用 window 函数。
SELECT *, nth_value(completed_date,2)
OVER (
PARTITION BY project_id ORDER BY completed_date DESC
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
AS date2
FROM project_milestone;
我有三个表:
project: project_id, project_name
milestone: milestone_id, milestone_name
project_milestone: id, project_id, milestone_id, completed_date
我想从按 project_id 分组的 project_milestone 中获得第二高的 completed_date 和 milestone_id。那就是我想为每个项目获得第二高 completed_date 的 milestone_id。正确的查询是什么?
我想你可以用 project_milestone
table 和 row_number()
做你想做的事:
select pm.*
from (select pm.*,
row_number() over (partition by project_id order by completed_date desc) as seqnum
from project_milestone pm
where pm.completed_date is not null
) pm
where seqnum = 2;
如果您需要包括 所有 个项目,即使是那些没有两个里程碑的项目,您也可以使用 left join
:
select p.project_id, pm.milestone_id, pm.completed_date
from projects p left join
(select pm.*,
row_number() over (partition by project_id order by completed_date desc) as seqnum
from project_milestone pm
where pm.completed_date is not null
) pm
on p.project_id = pm.project_id and pm.seqnum = 2;
使用 LATERAL (PG 9.3+) 可以产生比 window 函数版本更好的性能。
SELECT * FROM project;
project_id | project_name
------------+--------------
1 | Project A
2 | Project B
SELECT * FROM project_milestone;
id | project_id | milestone_id | completed_date
----+------------+--------------+------------------------
1 | 1 | 1 | 2000-01-01 00:00:00+01
2 | 1 | 2 | 2000-01-02 00:00:00+01
3 | 1 | 5 | 2000-01-03 00:00:00+01
4 | 1 | 6 | 2000-01-04 00:00:00+01
5 | 2 | 3 | 2000-02-01 00:00:00+01
6 | 2 | 4 | 2000-02-02 00:00:00+01
7 | 2 | 7 | 2000-02-03 00:00:00+01
8 | 2 | 8 | 2000-02-04 00:00:00+01
SELECT *
FROM project p
CROSS JOIN LATERAL (
SELECT milestone_id, completed_date
FROM project_milestone pm
WHERE pm.project_id = p.project_id
ORDER BY completed_date ASC
LIMIT 1
OFFSET 1
) second_highest;
project_id | project_name | milestone_id | completed_date
------------+--------------+--------------+------------------------
1 | Project A | 2 | 2000-01-02 00:00:00+01
2 | Project B | 4 | 2000-02-02 00:00:00+01
实现此目的的最简单方法是使用 window 函数。
SELECT *, nth_value(completed_date,2)
OVER (
PARTITION BY project_id ORDER BY completed_date DESC
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
AS date2
FROM project_milestone;