由于阶段已更改,从当前分区获取 FIRST_VALUE
Get the FIRST_VALUE from the current partition since stage has changed
我想使用 FIRST_VALUE() 函数以获得第一个“updated_at”值。但是我想要的是舞台改变后的第一个值。
with values as (
select 1 as deal, 2 as stage, '2020-11-10' as updated_at, '2020-11-10' as first_updated_at
union all
select 1 as deal, 2 as stage, '2020-11-11' as updated_at, '2020-11-10' as first_updated_at
union all
select 1 as deal, 3 as stage, '2020-11-12' as updated_at, '2020-11-12' as first_updated_at
union all
select 1 as deal, 4 as stage, '2020-11-13' as updated_at, '2020-11-13' as first_updated_at
union all
select 1 as deal, 4 as stage, '2020-11-14' as updated_at, '2020-11-13' as first_updated_at
union all
select 1 as deal, 2 as stage, '2020-11-15' as updated_at, '2020-11-15' as first_updated_at
union all
select 1 as deal, 2 as stage, '2020-11-16' as updated_at, '2020-11-15' as first_updated_at
)
select * from values
我正在尝试像这样使用 FIRST VALUE 函数:
FIRST_VALUE(updated_at) OVER(PARTITION BY deal, stageORDER BY updated_at ASC
我是不是遗漏了什么或者我想要的是不可能的?
提前致谢
您可以使用 window 函数来完成此操作。首先,查看前一阶段是否与上一行不同。然后在出现这种情况时使用累积最大值取updated_at
:
with values as (
select 1 as deal, 2 as stage, '2020-11-10' as updated_at, '2020-11-10' as first_updated_at
union all
select 1 as deal, 2 as stage, '2020-11-11' as updated_at, '2020-11-10' as first_updated_at
union all
select 1 as deal, 3 as stage, '2020-11-12' as updated_at, '2020-11-12' as first_updated_at
union all
select 1 as deal, 4 as stage, '2020-11-13' as updated_at, '2020-11-13' as first_updated_at
union all
select 1 as deal, 4 as stage, '2020-11-14' as updated_at, '2020-11-13' as first_updated_at
union all
select 1 as deal, 2 as stage, '2020-11-15' as updated_at, '2020-11-15' as first_updated_at
union all
select 1 as deal, 2 as stage, '2020-11-16' as updated_at, '2020-11-15' as first_updated_at
)
select v.*,
max(case when stage <> prev_stage or prev_stage is null then updated_at end) over (partition by deal order by updated_at) as imputed_first_updated_at
from (select v.*,
lag(stage) over (partition by deal order by updated_at) as prev_stage
from values v
) v
I am trying to use the FIRST VALUE function ...
考虑以下选项
select * except(updated_at_on_change),
ifnull(updated_at_on_change, first_value(updated_at ignore nulls) over win) as first_updated_at
from (
select *,
if(stage != ifnull(lag(stage) over win, stage - 1), updated_at, null) updated_at_on_change
from values
window win as (partition by deal order by updated_at)
)
window win as (partition by deal order by updated_at desc rows between 1 following and unbounded following )
# order by updated_at
如果应用于您问题中的示例数据 - 输出为
我想使用 FIRST_VALUE() 函数以获得第一个“updated_at”值。但是我想要的是舞台改变后的第一个值。
with values as (
select 1 as deal, 2 as stage, '2020-11-10' as updated_at, '2020-11-10' as first_updated_at
union all
select 1 as deal, 2 as stage, '2020-11-11' as updated_at, '2020-11-10' as first_updated_at
union all
select 1 as deal, 3 as stage, '2020-11-12' as updated_at, '2020-11-12' as first_updated_at
union all
select 1 as deal, 4 as stage, '2020-11-13' as updated_at, '2020-11-13' as first_updated_at
union all
select 1 as deal, 4 as stage, '2020-11-14' as updated_at, '2020-11-13' as first_updated_at
union all
select 1 as deal, 2 as stage, '2020-11-15' as updated_at, '2020-11-15' as first_updated_at
union all
select 1 as deal, 2 as stage, '2020-11-16' as updated_at, '2020-11-15' as first_updated_at
)
select * from values
我正在尝试像这样使用 FIRST VALUE 函数:
FIRST_VALUE(updated_at) OVER(PARTITION BY deal, stageORDER BY updated_at ASC
我是不是遗漏了什么或者我想要的是不可能的?
提前致谢
您可以使用 window 函数来完成此操作。首先,查看前一阶段是否与上一行不同。然后在出现这种情况时使用累积最大值取updated_at
:
with values as (
select 1 as deal, 2 as stage, '2020-11-10' as updated_at, '2020-11-10' as first_updated_at
union all
select 1 as deal, 2 as stage, '2020-11-11' as updated_at, '2020-11-10' as first_updated_at
union all
select 1 as deal, 3 as stage, '2020-11-12' as updated_at, '2020-11-12' as first_updated_at
union all
select 1 as deal, 4 as stage, '2020-11-13' as updated_at, '2020-11-13' as first_updated_at
union all
select 1 as deal, 4 as stage, '2020-11-14' as updated_at, '2020-11-13' as first_updated_at
union all
select 1 as deal, 2 as stage, '2020-11-15' as updated_at, '2020-11-15' as first_updated_at
union all
select 1 as deal, 2 as stage, '2020-11-16' as updated_at, '2020-11-15' as first_updated_at
)
select v.*,
max(case when stage <> prev_stage or prev_stage is null then updated_at end) over (partition by deal order by updated_at) as imputed_first_updated_at
from (select v.*,
lag(stage) over (partition by deal order by updated_at) as prev_stage
from values v
) v
I am trying to use the FIRST VALUE function ...
考虑以下选项
select * except(updated_at_on_change),
ifnull(updated_at_on_change, first_value(updated_at ignore nulls) over win) as first_updated_at
from (
select *,
if(stage != ifnull(lag(stage) over win, stage - 1), updated_at, null) updated_at_on_change
from values
window win as (partition by deal order by updated_at)
)
window win as (partition by deal order by updated_at desc rows between 1 following and unbounded following )
# order by updated_at
如果应用于您问题中的示例数据 - 输出为