Postgresql OVER 和 GROUP
Postgresql OVER and GROUP
我的分组有问题
我有一个 table(里面有很多东西,但不相关)看起来像:
id user
0 1
1 1
2 1
3 2
4 2
5 2
6 1
7 1
我正在尝试获取以下值:
user start end
1 0 2
2 3 5
1 6 7
基本上,我需要用户的第一次和最后一次出现,同时不打乱顺序。
我知道我需要使用 OVER (PARTITION BY ...),但我从未使用过它,也不确定如何构建此查询。
如果 i "partition by user",它会忽略顺序。如果我 "partition by id, user" 它再次 returns 错误。
我尝试过的例子(甚至没有尝试得到我需要的东西,但它是一个中点,告诉我一旦我弄清楚 "over" 部分如何去做):
SELECT user, count(user) over (partition by user):
user count
1 5
1 5
1 5
2 3
2 3
2 3
1 5
1 5
SELECT user, count(user) over (partition by id, user):
user count
1 1
1 1
1 1
2 1
2 1
2 1
1 1
1 1
开始,结束 id,使用:
SELECT user, min(id) over (partition by user) "start", max(id) over (partition by user) "end"
from table_name;
更新
我的回答是基于错误的谓词,所以错误。为了提供正确的一个而不是重复的@vkp 一个,我做了这个可怕的构造:
create table so74 as
select * from (values (0, 1), (1, 1), (2, 1), (3, 2), (4, 2), (5, 2), (6, 1), (7, 1)) t(id, u);
with d as (
with c as (
with b as (
select
*
, case when lag(u) over (order by id) <> u or id = min(id) over() then id end min
, case when lead(u) over (order by id) <> u or id=max(id) over () then id end max
from so74
)
select u, min,max
from b
where coalesce(min,max) is not null
)
select u,min,lead(max) over () max
from c
)
select *
from d
where coalesce(min,max) is not null
;
u | min | max
---+-----+-----
1 | 0 | 2
2 | 3 | 5
1 | 6 | 7
(3 rows)
Time: 0.456 ms
使用不同的行号方法将具有相同用户的连续行归为一组,并在遇到新用户时重新开始一个新组。此后,使用 group by
获取每个组的开始和结束。
SELECT USER,MIN(ID) AS START,MAX(ID) AS END
FROM (SELECT user,id, row_number() over(order by id)
- row_number() over (partition by user order by id) as grp
FROM tablename
) T
GROUP BY USER,GRP
我的分组有问题
我有一个 table(里面有很多东西,但不相关)看起来像:
id user
0 1
1 1
2 1
3 2
4 2
5 2
6 1
7 1
我正在尝试获取以下值:
user start end
1 0 2
2 3 5
1 6 7
基本上,我需要用户的第一次和最后一次出现,同时不打乱顺序。 我知道我需要使用 OVER (PARTITION BY ...),但我从未使用过它,也不确定如何构建此查询。 如果 i "partition by user",它会忽略顺序。如果我 "partition by id, user" 它再次 returns 错误。
我尝试过的例子(甚至没有尝试得到我需要的东西,但它是一个中点,告诉我一旦我弄清楚 "over" 部分如何去做):
SELECT user, count(user) over (partition by user):
user count
1 5
1 5
1 5
2 3
2 3
2 3
1 5
1 5
SELECT user, count(user) over (partition by id, user):
user count
1 1
1 1
1 1
2 1
2 1
2 1
1 1
1 1
开始,结束 id,使用:
SELECT user, min(id) over (partition by user) "start", max(id) over (partition by user) "end"
from table_name;
更新 我的回答是基于错误的谓词,所以错误。为了提供正确的一个而不是重复的@vkp 一个,我做了这个可怕的构造:
create table so74 as
select * from (values (0, 1), (1, 1), (2, 1), (3, 2), (4, 2), (5, 2), (6, 1), (7, 1)) t(id, u);
with d as (
with c as (
with b as (
select
*
, case when lag(u) over (order by id) <> u or id = min(id) over() then id end min
, case when lead(u) over (order by id) <> u or id=max(id) over () then id end max
from so74
)
select u, min,max
from b
where coalesce(min,max) is not null
)
select u,min,lead(max) over () max
from c
)
select *
from d
where coalesce(min,max) is not null
;
u | min | max
---+-----+-----
1 | 0 | 2
2 | 3 | 5
1 | 6 | 7
(3 rows)
Time: 0.456 ms
使用不同的行号方法将具有相同用户的连续行归为一组,并在遇到新用户时重新开始一个新组。此后,使用 group by
获取每个组的开始和结束。
SELECT USER,MIN(ID) AS START,MAX(ID) AS END
FROM (SELECT user,id, row_number() over(order by id)
- row_number() over (partition by user order by id) as grp
FROM tablename
) T
GROUP BY USER,GRP