Postgres 聚合
Postgres Aggregations
我有一个 postgres table 如下
Username
Event
Date
UserA
Log in
02/03/2020 07:06:30
UserA
Log in
02/03/2020 10:15:15
UserA
Log in
02/03/2020 10:17:01
UserA
Log Out
02/03/2020 10:28:55
UserA
Log in
02/07/2019 14:56:15
UserA
Log in
02/08/2019 10:50:34
UserA
Log Out
02/08/2019 10:57:21
尝试实现的结果table如下:
Username
log_in_Event
log_in_Date
log_out_event
log_out_date
UserA
Log in
02/03/2020 07:06:30
null
null
UserA
Log in
02/03/2020 10:15:15
null
null
UserA
Log in
02/03/2020 10:17:01
Log Out
02/03/2020 10:28:55
UserA
Log in
02/07/2019 14:56:15
null
null
UserA
Log in
02/08/2019 10:50:34
Log Out
02/08/2019 10:57:21
我已经尝试过的查询如下:
select * from
(
select "User Name" , "Event" , "Date" , "IP Address"
from log_activities log_in
where "Event" = 'User Logged In'
)log_in
left join
(
select "User Name" , "Event" , "Date" , "IP Address"
from log_activities log_out
where "Event" = 'User Logged Out'
)log_out
on
log_in."User Name" = log_out."User Name"
and TO_DATE(log_in."Date" ,'DD/MM/YYYY') = TO_DATE(log_out."Date" ,'DD/MM/YYYY')
and log_in."Date" < log_out."Date"
and log_in."IP Address" = log_out."IP Address"
这看起来像是一个缺口和孤岛问题。我建议使用 window 的登录次数来构建组,然后进行聚合:
select username,
'Log in' as log_in_event,
min(date) as log_in_date,
max(event) filter(where event = 'Log Out') as log_out_event,
max(date) filter(where event = 'Log Out') as log_out_date
from (
select la.*,
count(*)
filter(where event = 'Log in')
over(partition by username order by date) as grp
from log_activities la
) la
group by username, grp
我将其视为“我想要所有登录事件。如果那是下一个事件,我想要下一个注销”。如果是这样,lead()
似乎是最有用的方法:
select la.username, la.event as login_event, la.date as login_date,
la.next_event as logout_event, la.next_date as logout_date
from (select la.*
lead(event) over (partition by username order by date) as next_event,
lead(date) over (partition by username order by date) as next_date
from log_activities la
) la
where event = 'Log in';
您可以使用lead
解析函数和CASE..WHEN
如下:
select t.username,
t.event as login_event,
t.date as login_date,
case when t.lead_event = 'Log Out' then t.lead_event end as logout_event,
case when t.lead_event = 'Log Out' then t.lead_date end as logout_date
from (select t.*,
lead(event) over (partition by username order by date) as lead_event,
lead(date) over (partition by username order by date) as lead_date
from log_activities t
) t
where t.event = 'Log in';
我有一个 postgres table 如下
Username | Event | Date |
---|---|---|
UserA | Log in | 02/03/2020 07:06:30 |
UserA | Log in | 02/03/2020 10:15:15 |
UserA | Log in | 02/03/2020 10:17:01 |
UserA | Log Out | 02/03/2020 10:28:55 |
UserA | Log in | 02/07/2019 14:56:15 |
UserA | Log in | 02/08/2019 10:50:34 |
UserA | Log Out | 02/08/2019 10:57:21 |
尝试实现的结果table如下:
Username | log_in_Event | log_in_Date | log_out_event | log_out_date |
---|---|---|---|---|
UserA | Log in | 02/03/2020 07:06:30 | null | null |
UserA | Log in | 02/03/2020 10:15:15 | null | null |
UserA | Log in | 02/03/2020 10:17:01 | Log Out | 02/03/2020 10:28:55 |
UserA | Log in | 02/07/2019 14:56:15 | null | null |
UserA | Log in | 02/08/2019 10:50:34 | Log Out | 02/08/2019 10:57:21 |
我已经尝试过的查询如下:
select * from
(
select "User Name" , "Event" , "Date" , "IP Address"
from log_activities log_in
where "Event" = 'User Logged In'
)log_in
left join
(
select "User Name" , "Event" , "Date" , "IP Address"
from log_activities log_out
where "Event" = 'User Logged Out'
)log_out
on
log_in."User Name" = log_out."User Name"
and TO_DATE(log_in."Date" ,'DD/MM/YYYY') = TO_DATE(log_out."Date" ,'DD/MM/YYYY')
and log_in."Date" < log_out."Date"
and log_in."IP Address" = log_out."IP Address"
这看起来像是一个缺口和孤岛问题。我建议使用 window 的登录次数来构建组,然后进行聚合:
select username,
'Log in' as log_in_event,
min(date) as log_in_date,
max(event) filter(where event = 'Log Out') as log_out_event,
max(date) filter(where event = 'Log Out') as log_out_date
from (
select la.*,
count(*)
filter(where event = 'Log in')
over(partition by username order by date) as grp
from log_activities la
) la
group by username, grp
我将其视为“我想要所有登录事件。如果那是下一个事件,我想要下一个注销”。如果是这样,lead()
似乎是最有用的方法:
select la.username, la.event as login_event, la.date as login_date,
la.next_event as logout_event, la.next_date as logout_date
from (select la.*
lead(event) over (partition by username order by date) as next_event,
lead(date) over (partition by username order by date) as next_date
from log_activities la
) la
where event = 'Log in';
您可以使用lead
解析函数和CASE..WHEN
如下:
select t.username,
t.event as login_event,
t.date as login_date,
case when t.lead_event = 'Log Out' then t.lead_event end as logout_event,
case when t.lead_event = 'Log Out' then t.lead_date end as logout_date
from (select t.*,
lead(event) over (partition by username order by date) as lead_event,
lead(date) over (partition by username order by date) as lead_date
from log_activities t
) t
where t.event = 'Log in';