使用 bigquery 从移动 window 中查找前一天、前一天的最大值
Find max value of previous day, second previous day from moving window using bigquery
我有一个要求求前一天的最大值(val_prev_day),前一天的最大值(val_sec_prev_day
), 前第三天 (val_third_prev_day) 从对应的原始值列表 (val_raw) 中获取每一天,如下例所示。
我假设数据集模式,但基本上你可以 shift days
在 subqueries
中做 LEFT OUTER JOIN
.
WITH
dataset AS (
SELECT 1 as day, 0 as val_raw,
UNION ALL SELECT 1 as day, 10 as val_raw,
UNION ALL SELECT 1 as day, 110 as val_raw,
UNION ALL SELECT 2 as day, 30 as val_raw,
UNION ALL SELECT 2 as day, 70 as val_raw,
UNION ALL SELECT 2 as day, 90 as val_raw,
UNION ALL SELECT 3 as day, 0 as val_raw,
UNION ALL SELECT 3 as day, 5 as val_raw,
UNION ALL SELECT 3 as day, 5 as val_raw,
UNION ALL SELECT 4 as day, 0 as val_raw,
UNION ALL SELECT 4 as day, 5 as val_raw,
UNION ALL SELECT 4 as day, 10 as val_raw,
),
shift_one_day_max AS (
SELECT day + 1 as day, max(val_raw) as max_val,
FROM dataset
GROUP BY day
),
shift_two_day_max AS (
SELECT day + 2 as day, max(val_raw) as max_val,
FROM dataset
GROUP BY day
)
SELECT *
FROM dataset
LEFT OUTER JOIN shift_one_day_max USING (day)
LEFT OUTER JOIN shift_two_day_max USING (day)
;
c.f。我省略了 event_ts
,因为它不影响操作。
考虑以下方法
select day, val_raw,
max(val_raw) over(prev_days range between 1 preceding and 1 preceding) val_prev_day,
max(val_raw) over(prev_days range between 2 preceding and 2 preceding) val_sec_prev_day,
max(val_raw) over(prev_days range between 3 preceding and 3 preceding) val_third_prev_day,
from your_table
window prev_days as (order by unix_date(day))
如果应用于您问题中的示例数据 - 输出为
P.S。我使用下面的虚拟数据来测试上面的
with your_table as (
select date '2022-01-01' as day, 0 as val_raw, union all
select '2022-01-01', 10, union all
select '2022-01-01', 110, union all
select '2022-01-02', 30, union all
select '2022-01-02', 70, union all
select '2022-01-02', 90, union all
select '2022-01-03', 0, union all
select '2022-01-03', 5, union all
select '2022-01-03', 5, union all
select '2022-01-04', 0, union all
select '2022-01-04', 5, union all
select '2022-01-04', 10
)
我有一个要求求前一天的最大值(val_prev_day),前一天的最大值(val_sec_prev_day ), 前第三天 (val_third_prev_day) 从对应的原始值列表 (val_raw) 中获取每一天,如下例所示。
我假设数据集模式,但基本上你可以 shift days
在 subqueries
中做 LEFT OUTER JOIN
.
WITH
dataset AS (
SELECT 1 as day, 0 as val_raw,
UNION ALL SELECT 1 as day, 10 as val_raw,
UNION ALL SELECT 1 as day, 110 as val_raw,
UNION ALL SELECT 2 as day, 30 as val_raw,
UNION ALL SELECT 2 as day, 70 as val_raw,
UNION ALL SELECT 2 as day, 90 as val_raw,
UNION ALL SELECT 3 as day, 0 as val_raw,
UNION ALL SELECT 3 as day, 5 as val_raw,
UNION ALL SELECT 3 as day, 5 as val_raw,
UNION ALL SELECT 4 as day, 0 as val_raw,
UNION ALL SELECT 4 as day, 5 as val_raw,
UNION ALL SELECT 4 as day, 10 as val_raw,
),
shift_one_day_max AS (
SELECT day + 1 as day, max(val_raw) as max_val,
FROM dataset
GROUP BY day
),
shift_two_day_max AS (
SELECT day + 2 as day, max(val_raw) as max_val,
FROM dataset
GROUP BY day
)
SELECT *
FROM dataset
LEFT OUTER JOIN shift_one_day_max USING (day)
LEFT OUTER JOIN shift_two_day_max USING (day)
;
c.f。我省略了 event_ts
,因为它不影响操作。
考虑以下方法
select day, val_raw,
max(val_raw) over(prev_days range between 1 preceding and 1 preceding) val_prev_day,
max(val_raw) over(prev_days range between 2 preceding and 2 preceding) val_sec_prev_day,
max(val_raw) over(prev_days range between 3 preceding and 3 preceding) val_third_prev_day,
from your_table
window prev_days as (order by unix_date(day))
如果应用于您问题中的示例数据 - 输出为
P.S。我使用下面的虚拟数据来测试上面的
with your_table as (
select date '2022-01-01' as day, 0 as val_raw, union all
select '2022-01-01', 10, union all
select '2022-01-01', 110, union all
select '2022-01-02', 30, union all
select '2022-01-02', 70, union all
select '2022-01-02', 90, union all
select '2022-01-03', 0, union all
select '2022-01-03', 5, union all
select '2022-01-03', 5, union all
select '2022-01-04', 0, union all
select '2022-01-04', 5, union all
select '2022-01-04', 10
)