Return 当前行之前的最新记录
Return the latest record which was before the current row
我有 table 条消息。每条消息都有时间戳 created_ts
、类型(purchase
或 support
)、user_id
和主键 msg_id
。例如,
msg_id
user_id
created_ts
type
1
1
1
purchase
2
1
2
support
3
2
3
purchase
4
2
4
support
5
2
5
support
我想生成 table 支持消息,这样每一行都将包含支持消息的详细信息,以及最后一次购买的详细信息(如果存在),即purchase
类型的最新消息,发生在该行的支持消息之前。例如,
support_msg_id
user_id
support_created_ts
type
last_purchase_msg_id
last_purchase_created_ts
2
1
2
support
1
1
4
2
4
support
3
3
5
2
5
support
3
3
我试过以下查询:
select * from
(select msg_id as support_msg_id, user_id, created_ts as support_created_ts, type
(select msg_id as last_purchase_msg_id
from messages
where type = 'purchase' and created_ts < support_created_ts
order by created_ts desc
limit 1)
from messages
where type = 'support'
) as supports
inner join
(
select msg_id as last_purchase_msg_id, created_ts as last_purchase_ts
from messages
where type = 'purchase'
) as purchases
on supports.last_purchase_msg_id = purchases.last_purchase_msg_id
然而这太慢了。
您的问题的解决方案是双重使用 window 函数。首先使用 count
函数来识别组的边界,其中每个组以 purchase
开头并继续所有后续的 support
。然后使用 max
函数在每个组中获取(实际上是单个非空)值。
Dbfiddle 适用于 Postgres,但仅使用标准 SQL,我希望它也可用于 Redshift。
with messages(msg_id, user_id, created_ts, type) as (values
(1, 1, 1, 'purchase'),
(2, 1, 2, 'support'),
(3, 2, 3, 'purchase'),
(4, 2, 4, 'support'),
(5, 2, 5, 'support')
)
, precomputed as (
select m.*
, case m.type when 'purchase' then m.msg_id end as last_purchase_msg_id
, case m.type when 'purchase' then m.created_ts end as last_purchase_created_ts
, count(case m.type when 'purchase' then 1 end) over (order by m.created_ts) as grp
from messages m
)
, lasts as (
select g.msg_id, g.user_id, g.created_ts, g.type
, max(g.last_purchase_msg_id) over (partition by g.grp) as last_purchase_msg_id
, max(g.last_purchase_created_ts) over (partition by g.grp) as last_purchase_created_ts
from precomputed g
)
select * from lasts
where type = 'support'
with p as (
select *, lead(created_ts) over (partition by user_id order by created_ts) as nextts
from messages where type = 'purchase'
)
select *
from messages s
left outer join p on p.user_id = s.user_id
and s.created_ts >= p.created_ts and (s.created_ts < p.nextts or p.nextts is null)
where s.type = 'support'
https://dbfiddle.uk/?rdbms=postgres_10&fiddle=aea646e85dfa35896f8cacfabec439d6
我有 table 条消息。每条消息都有时间戳 created_ts
、类型(purchase
或 support
)、user_id
和主键 msg_id
。例如,
msg_id | user_id | created_ts | type |
---|---|---|---|
1 | 1 | 1 | purchase |
2 | 1 | 2 | support |
3 | 2 | 3 | purchase |
4 | 2 | 4 | support |
5 | 2 | 5 | support |
我想生成 table 支持消息,这样每一行都将包含支持消息的详细信息,以及最后一次购买的详细信息(如果存在),即purchase
类型的最新消息,发生在该行的支持消息之前。例如,
support_msg_id | user_id | support_created_ts | type | last_purchase_msg_id | last_purchase_created_ts |
---|---|---|---|---|---|
2 | 1 | 2 | support | 1 | 1 |
4 | 2 | 4 | support | 3 | 3 |
5 | 2 | 5 | support | 3 | 3 |
我试过以下查询:
select * from
(select msg_id as support_msg_id, user_id, created_ts as support_created_ts, type
(select msg_id as last_purchase_msg_id
from messages
where type = 'purchase' and created_ts < support_created_ts
order by created_ts desc
limit 1)
from messages
where type = 'support'
) as supports
inner join
(
select msg_id as last_purchase_msg_id, created_ts as last_purchase_ts
from messages
where type = 'purchase'
) as purchases
on supports.last_purchase_msg_id = purchases.last_purchase_msg_id
然而这太慢了。
您的问题的解决方案是双重使用 window 函数。首先使用 count
函数来识别组的边界,其中每个组以 purchase
开头并继续所有后续的 support
。然后使用 max
函数在每个组中获取(实际上是单个非空)值。
Dbfiddle 适用于 Postgres,但仅使用标准 SQL,我希望它也可用于 Redshift。
with messages(msg_id, user_id, created_ts, type) as (values
(1, 1, 1, 'purchase'),
(2, 1, 2, 'support'),
(3, 2, 3, 'purchase'),
(4, 2, 4, 'support'),
(5, 2, 5, 'support')
)
, precomputed as (
select m.*
, case m.type when 'purchase' then m.msg_id end as last_purchase_msg_id
, case m.type when 'purchase' then m.created_ts end as last_purchase_created_ts
, count(case m.type when 'purchase' then 1 end) over (order by m.created_ts) as grp
from messages m
)
, lasts as (
select g.msg_id, g.user_id, g.created_ts, g.type
, max(g.last_purchase_msg_id) over (partition by g.grp) as last_purchase_msg_id
, max(g.last_purchase_created_ts) over (partition by g.grp) as last_purchase_created_ts
from precomputed g
)
select * from lasts
where type = 'support'
with p as (
select *, lead(created_ts) over (partition by user_id order by created_ts) as nextts
from messages where type = 'purchase'
)
select *
from messages s
left outer join p on p.user_id = s.user_id
and s.created_ts >= p.created_ts and (s.created_ts < p.nextts or p.nextts is null)
where s.type = 'support'
https://dbfiddle.uk/?rdbms=postgres_10&fiddle=aea646e85dfa35896f8cacfabec439d6