如何找到具有红移条件的不同行中相同列值之间的差异?
How to find the difference between same column values in different rows with condition in redshift?
我有一个聊天室数据集,其中包含用户加入和离开房间的活动。一个用户可以多次加入和离开房间。我需要找出每个用户在每个房间花费的时间。
room_id user_id event time
1 1 join 2021-12-10 09:00:00
1 2 join 2021-12-10 09:10:00
1 3 join 2021-12-10 09:20:00
1 2 leave 2021-12-10 09:30:00
1 1 leave 2021-12-10 09:40:00
1 1 join 2021-12-10 09:50:00
1 3 leave 2021-12-10 10:00:00
1 1 leave 2021-12-10 10:10:00
我需要的是:
room_id user_id time_spend
1 1 60
1 2 20
1 1 30
我是红移新手。我尝试使用滞后功能,但无法添加加入和离开等条件。有人可以帮我解决这个问题吗?
下面是基于您的样本数据的开始:
create table test (room_id int, user_id int, event varchar(16), event_time timestamp);
insert into test values
(1, 1, 'join', '2021-12-10 09:00:00'),
(1, 2, 'join', '2021-12-10 09:10:00'),
(1, 3, 'join', '2021-12-10 09:20:00'),
(1, 2, 'leave', '2021-12-10 09:30:00'),
(1, 1, 'leave', '2021-12-10 09:40:00'),
(1, 1, 'join', '2021-12-10 09:50:00'),
(1, 3, 'leave', '2021-12-10 10:00:00'),
(1, 1, 'leave', '2021-12-10 10:10:00')
;
select * from test order by user_id, event_time;
select room_id, user_id, sum(time_spent) as time_spent
from (
select room_id, user_id, event_time, decode(event,'join',event_time) as join_time,
lead(decode(event,'leave',event_time)) ignore NULLS OVER (partition by room_id, user_id order by event_time) as leave_time,
datediff(min, join_time, leave_time) as time_spent
from test)
group by room_id, user_id
order by room_id, user_id;
我有一个聊天室数据集,其中包含用户加入和离开房间的活动。一个用户可以多次加入和离开房间。我需要找出每个用户在每个房间花费的时间。
room_id user_id event time
1 1 join 2021-12-10 09:00:00
1 2 join 2021-12-10 09:10:00
1 3 join 2021-12-10 09:20:00
1 2 leave 2021-12-10 09:30:00
1 1 leave 2021-12-10 09:40:00
1 1 join 2021-12-10 09:50:00
1 3 leave 2021-12-10 10:00:00
1 1 leave 2021-12-10 10:10:00
我需要的是:
room_id user_id time_spend
1 1 60
1 2 20
1 1 30
我是红移新手。我尝试使用滞后功能,但无法添加加入和离开等条件。有人可以帮我解决这个问题吗?
下面是基于您的样本数据的开始:
create table test (room_id int, user_id int, event varchar(16), event_time timestamp);
insert into test values
(1, 1, 'join', '2021-12-10 09:00:00'),
(1, 2, 'join', '2021-12-10 09:10:00'),
(1, 3, 'join', '2021-12-10 09:20:00'),
(1, 2, 'leave', '2021-12-10 09:30:00'),
(1, 1, 'leave', '2021-12-10 09:40:00'),
(1, 1, 'join', '2021-12-10 09:50:00'),
(1, 3, 'leave', '2021-12-10 10:00:00'),
(1, 1, 'leave', '2021-12-10 10:10:00')
;
select * from test order by user_id, event_time;
select room_id, user_id, sum(time_spent) as time_spent
from (
select room_id, user_id, event_time, decode(event,'join',event_time) as join_time,
lead(decode(event,'leave',event_time)) ignore NULLS OVER (partition by room_id, user_id order by event_time) as leave_time,
datediff(min, join_time, leave_time) as time_spent
from test)
group by room_id, user_id
order by room_id, user_id;