在每行更改值时附上最近的时间戳
Attach the most recent timestamp when value was changed to each row
我有更新属性的数据集:
user_id
flag
updated_at
123
1
101
123
0
101
123
1
102
123
1
103
123
1
104
124
0
101
124
0
102
124
0
110
我需要在 flag
值的最近更改时为每一行分配一个值,因此它应该如下所示:
user_id
flag
updated_at
recent_updated_at
123
1
101
101
123
0
102
102
123
1
103
103
123
1
104
103
123
1
105
103
124
0
101
101
124
0
102
101
124
0
110
101
到目前为止我有这个查询,但现在感觉卡住了:
with raw_dataset as (
SELECT 123 as user_id, 1 as flag, 101 as updated_at
UNION ALL SELECT 123 as user_id, 0 as flag, 102 as updated_at
UNION ALL SELECT 123 as user_id, 1 as flag, 103 as updated_at
UNION ALL SELECT 123 as user_id, 0 as flag, 104 as updated_at
UNION ALL SELECT 123 as user_id, 0 as flag, 105 as updated_at
UNION ALL SELECT 124 as user_id, 1 as flag, 101 as updated_at
UNION ALL SELECT 124 as user_id, 1 as flag, 103 as updated_at
UNION ALL SELECT 124 as user_id, 1 as flag, 110 as updated_at
)
, dataset as (
select * from (
select
*,
-- lag(flag, 1, -1) over (partition by user_id order by updated_at ASC) as lag_,
row_number() over (partition by user_id, flag order by updated_at ASC) as rn
from raw_dataset
)
)
SELECT *, first_value(updated_at) over (partition by user_id,flag order by rn ASC) FROM dataset
ORDER BY user_id, updated_at ASC
有什么想法、提示吗?
提前致谢!
是你需要的吗
with raw_dataset as (
SELECT 123 as user_id, 1 as flag, 101 as updated_at
UNION ALL SELECT 123 as user_id, 0 as flag, 102 as updated_at
UNION ALL SELECT 123 as user_id, 1 as flag, 103 as updated_at
UNION ALL SELECT 123 as user_id, 0 as flag, 104 as updated_at
UNION ALL SELECT 123 as user_id, 0 as flag, 105 as updated_at
UNION ALL SELECT 124 as user_id, 1 as flag, 101 as updated_at
UNION ALL SELECT 124 as user_id, 1 as flag, 103 as updated_at
UNION ALL SELECT 124 as user_id, 1 as flag, 110 as updated_at
)
, dataset as (
select *,
row_number() over (partition by user_id order by updated_at ASC)
- row_number() over (partition by user_id, flag order by updated_at ASC) as grp
from raw_dataset
)
SELECT *, min(updated_at) over (partition by user_id, flag, grp)
FROM dataset
ORDER BY user_id, updated_at ASC
您似乎想要 flag
值更改的最近时间。你的问题比较混乱,因为问题中的数据与代码中的数据不同。
但是,请遵循returns问题中的数据和您想要的:
with raw_dataset as (
SELECT 123 as user_id, 1 as flag, 101 as updated_at
UNION ALL SELECT 123 as user_id, 0 as flag, 102 as updated_at
UNION ALL SELECT 123 as user_id, 1 as flag, 103 as updated_at
UNION ALL SELECT 123 as user_id, 1 as flag, 104 as updated_at
UNION ALL SELECT 123 as user_id, 1 as flag, 105 as updated_at
UNION ALL SELECT 124 as user_id, 0 as flag, 101 as updated_at
UNION ALL SELECT 124 as user_id, 0 as flag, 103 as updated_at
UNION ALL SELECT 124 as user_id, 0 as flag, 110 as updated_at
)
select rd.*,
max(case when prev_flag is null or prev_flag <> flag then updated_at end) over (
partition by user_id
order by updated_at
) as most_recent_updated_at
from (select rd.*,
lag(flag) over (partition by user_id order by updated_at) as prev_flag
from raw_dataset rd
) rd;
逻辑很简单。看看前面的旗帜。然后在值变化的时候做一个累加max()
Here 是一个 db<>fiddle.
我有更新属性的数据集:
user_id | flag | updated_at |
---|---|---|
123 | 1 | 101 |
123 | 0 | 101 |
123 | 1 | 102 |
123 | 1 | 103 |
123 | 1 | 104 |
124 | 0 | 101 |
124 | 0 | 102 |
124 | 0 | 110 |
我需要在 flag
值的最近更改时为每一行分配一个值,因此它应该如下所示:
user_id | flag | updated_at | recent_updated_at |
---|---|---|---|
123 | 1 | 101 | 101 |
123 | 0 | 102 | 102 |
123 | 1 | 103 | 103 |
123 | 1 | 104 | 103 |
123 | 1 | 105 | 103 |
124 | 0 | 101 | 101 |
124 | 0 | 102 | 101 |
124 | 0 | 110 | 101 |
到目前为止我有这个查询,但现在感觉卡住了:
with raw_dataset as (
SELECT 123 as user_id, 1 as flag, 101 as updated_at
UNION ALL SELECT 123 as user_id, 0 as flag, 102 as updated_at
UNION ALL SELECT 123 as user_id, 1 as flag, 103 as updated_at
UNION ALL SELECT 123 as user_id, 0 as flag, 104 as updated_at
UNION ALL SELECT 123 as user_id, 0 as flag, 105 as updated_at
UNION ALL SELECT 124 as user_id, 1 as flag, 101 as updated_at
UNION ALL SELECT 124 as user_id, 1 as flag, 103 as updated_at
UNION ALL SELECT 124 as user_id, 1 as flag, 110 as updated_at
)
, dataset as (
select * from (
select
*,
-- lag(flag, 1, -1) over (partition by user_id order by updated_at ASC) as lag_,
row_number() over (partition by user_id, flag order by updated_at ASC) as rn
from raw_dataset
)
)
SELECT *, first_value(updated_at) over (partition by user_id,flag order by rn ASC) FROM dataset
ORDER BY user_id, updated_at ASC
有什么想法、提示吗? 提前致谢!
是你需要的吗
with raw_dataset as (
SELECT 123 as user_id, 1 as flag, 101 as updated_at
UNION ALL SELECT 123 as user_id, 0 as flag, 102 as updated_at
UNION ALL SELECT 123 as user_id, 1 as flag, 103 as updated_at
UNION ALL SELECT 123 as user_id, 0 as flag, 104 as updated_at
UNION ALL SELECT 123 as user_id, 0 as flag, 105 as updated_at
UNION ALL SELECT 124 as user_id, 1 as flag, 101 as updated_at
UNION ALL SELECT 124 as user_id, 1 as flag, 103 as updated_at
UNION ALL SELECT 124 as user_id, 1 as flag, 110 as updated_at
)
, dataset as (
select *,
row_number() over (partition by user_id order by updated_at ASC)
- row_number() over (partition by user_id, flag order by updated_at ASC) as grp
from raw_dataset
)
SELECT *, min(updated_at) over (partition by user_id, flag, grp)
FROM dataset
ORDER BY user_id, updated_at ASC
您似乎想要 flag
值更改的最近时间。你的问题比较混乱,因为问题中的数据与代码中的数据不同。
但是,请遵循returns问题中的数据和您想要的:
with raw_dataset as (
SELECT 123 as user_id, 1 as flag, 101 as updated_at
UNION ALL SELECT 123 as user_id, 0 as flag, 102 as updated_at
UNION ALL SELECT 123 as user_id, 1 as flag, 103 as updated_at
UNION ALL SELECT 123 as user_id, 1 as flag, 104 as updated_at
UNION ALL SELECT 123 as user_id, 1 as flag, 105 as updated_at
UNION ALL SELECT 124 as user_id, 0 as flag, 101 as updated_at
UNION ALL SELECT 124 as user_id, 0 as flag, 103 as updated_at
UNION ALL SELECT 124 as user_id, 0 as flag, 110 as updated_at
)
select rd.*,
max(case when prev_flag is null or prev_flag <> flag then updated_at end) over (
partition by user_id
order by updated_at
) as most_recent_updated_at
from (select rd.*,
lag(flag) over (partition by user_id order by updated_at) as prev_flag
from raw_dataset rd
) rd;
逻辑很简单。看看前面的旗帜。然后在值变化的时候做一个累加max()
Here 是一个 db<>fiddle.