BigQuery - 使用窗口函数的移动平均线
BigQuery - Moving Averages using Windowing Function
我正在尝试创建一个度量来使用窗口函数计算移动平均值,而不是创建几个常见的 table 表达式 CTE。为什么当我按照 CTE 中分组数据的方式对数据进行分区时,窗口函数返回不正确的结果?
WITH data as (
SELECT
1 as id, "2020-06-20" as day,10 as quantity
UNION ALL SELECT 2,"2020-06-20", 15
UNION ALL SELECT 2, "2020-06-20", 20
UNION ALL SELECT 2, "2020-06-20", 21
UNION ALL SELECT 2, "2020-06-20", 19
UNION ALL SELECT 1,"2020-06-21",5
UNION ALL SELECT 2,"2020-06-21",10
UNION ALL SELECT 2, "2020-06-21",5
UNION ALL SELECT 1,"2020-06-22",9
UNION ALL SELECT 2,"2020-06-22",4
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",20
)
, CTEDAILY AS (
SELECT
AVG(quantity) AS sq,
day
FROM data
GROUP BY day
)
, CTEMA AS (
SELECT
day,
AVG(sq) OVER (ORDER BY day ASC ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) ma3_version2
FROM CTEDAILY
)
, CTECOMP AS (
SELECT
day,
quantity,
SUM(quantity) OVER w1 AS sum_quantity,
AVG(quantity) OVER w1 AS avg_quantity,
PERCENTILE_CONT(quantity, 0.5) OVER w1 AS median,
STDDEV(quantity) OVER w1 AS stdd,
AVG(quantity) OVER w2_ma3 AS ma3_version1, /* not daily, currently returning MA by row? want to first sum then MA3??? */
FROM data
WINDOW w1 AS (PARTITION BY day),
w2_ma3 AS (PARTITION BY day ORDER BY day ASC ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)
ORDER BY day ASC
)
SELECT
CTECOMP.day,
ANY_VALUE(CTECOMP.sum_quantity) sum_q,
ANY_VALUE(avg_quantity) AS avg_q,
ANY_VALUE(median) AS median,
ANY_VALUE(stdd) AS stdd,
ANY_VALUE(CTECOMP.ma3_version1) AS ma3_incorrect,
ANY_VALUE(CTEMA.ma3_version2) AS ma3_correct
FROM CTECOMP
INNER JOIN CTEMA
ON CTECOMP.day = CTEMA.day
GROUP BY CTECOMP.day
ORDER BY day ASC;
- 您的第一个 CTE 取日内平均值。
- 您的第二个 CTE 取 X 天内的日内平均值的移动平均值
- 您的第三个 CTE 及其 window 函数需要移动(3 个周期)日内平均值
遵循您指定的“正确”答案。
这是带有子查询的您的风格
WITH data as (
SELECT
1 as id, "2020-06-20" as day,10 as quantity
UNION ALL SELECT 2,"2020-06-20", 15
UNION ALL SELECT 2, "2020-06-20", 20
UNION ALL SELECT 2, "2020-06-20", 21
UNION ALL SELECT 2, "2020-06-20", 19
UNION ALL SELECT 1,"2020-06-21",5
UNION ALL SELECT 2,"2020-06-21",10
UNION ALL SELECT 2, "2020-06-21",5
UNION ALL SELECT 1,"2020-06-22",9
UNION ALL SELECT 2,"2020-06-22",4
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",20
)
select
*,
avg(avg_intra_day_quantity) over(order by day asc rows between 2 preceding and current row) ma3
from
(
select
distinct
day,
sum(quantity) over w1 as sum_daily_quantity,
avg(quantity) over w1 as avg_intra_day_quantity,
percentile_cont(quantity, 0.5) over w1 as median,
stddev(quantity) over w1 as stdd,
from data
window w1 as (partition by day)
)
这是一个带有分组依据和 window 函数(无 CTE)的子查询
WITH data as (
SELECT
1 as id, "2020-06-20" as day,10 as quantity
UNION ALL SELECT 2,"2020-06-20", 15
UNION ALL SELECT 2, "2020-06-20", 20
UNION ALL SELECT 2, "2020-06-20", 21
UNION ALL SELECT 2, "2020-06-20", 19
UNION ALL SELECT 1,"2020-06-21",5
UNION ALL SELECT 2,"2020-06-21",10
UNION ALL SELECT 2, "2020-06-21",5
UNION ALL SELECT 1,"2020-06-22",9
UNION ALL SELECT 2,"2020-06-22",4
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",20
)
select
day,
avg(sq) over(order by day asc rows between 2 preceding and current row) ma3
from
(
select
avg(quantity) as sq,
day
from data
group by day
)
这是一个仅使用 window 函数的子查询
WITH data as (
SELECT
1 as id, "2020-06-20" as day,10 as quantity
UNION ALL SELECT 2,"2020-06-20", 15
UNION ALL SELECT 2, "2020-06-20", 20
UNION ALL SELECT 2, "2020-06-20", 21
UNION ALL SELECT 2, "2020-06-20", 19
UNION ALL SELECT 1,"2020-06-21",5
UNION ALL SELECT 2,"2020-06-21",10
UNION ALL SELECT 2, "2020-06-21",5
UNION ALL SELECT 1,"2020-06-22",9
UNION ALL SELECT 2,"2020-06-22",4
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",20
)
select
day,
avg(intra_day_avg) over(order by day asc rows between 2 preceding and current row) ma3
from
(
select
distinct
day,
avg(quantity) over(partition by day) intra_day_avg
from
data
)
PS / 仅供参考
如果您正在尝试计算每日简单移动平均线,您应该首先按天对数量求和,然后对 N 个时间段内的总和进行移动平均线计算,这样您就可以得到每日移动平均线。现在你取日内平均值的平均值。
我正在尝试创建一个度量来使用窗口函数计算移动平均值,而不是创建几个常见的 table 表达式 CTE。为什么当我按照 CTE 中分组数据的方式对数据进行分区时,窗口函数返回不正确的结果?
WITH data as (
SELECT
1 as id, "2020-06-20" as day,10 as quantity
UNION ALL SELECT 2,"2020-06-20", 15
UNION ALL SELECT 2, "2020-06-20", 20
UNION ALL SELECT 2, "2020-06-20", 21
UNION ALL SELECT 2, "2020-06-20", 19
UNION ALL SELECT 1,"2020-06-21",5
UNION ALL SELECT 2,"2020-06-21",10
UNION ALL SELECT 2, "2020-06-21",5
UNION ALL SELECT 1,"2020-06-22",9
UNION ALL SELECT 2,"2020-06-22",4
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",20
)
, CTEDAILY AS (
SELECT
AVG(quantity) AS sq,
day
FROM data
GROUP BY day
)
, CTEMA AS (
SELECT
day,
AVG(sq) OVER (ORDER BY day ASC ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) ma3_version2
FROM CTEDAILY
)
, CTECOMP AS (
SELECT
day,
quantity,
SUM(quantity) OVER w1 AS sum_quantity,
AVG(quantity) OVER w1 AS avg_quantity,
PERCENTILE_CONT(quantity, 0.5) OVER w1 AS median,
STDDEV(quantity) OVER w1 AS stdd,
AVG(quantity) OVER w2_ma3 AS ma3_version1, /* not daily, currently returning MA by row? want to first sum then MA3??? */
FROM data
WINDOW w1 AS (PARTITION BY day),
w2_ma3 AS (PARTITION BY day ORDER BY day ASC ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)
ORDER BY day ASC
)
SELECT
CTECOMP.day,
ANY_VALUE(CTECOMP.sum_quantity) sum_q,
ANY_VALUE(avg_quantity) AS avg_q,
ANY_VALUE(median) AS median,
ANY_VALUE(stdd) AS stdd,
ANY_VALUE(CTECOMP.ma3_version1) AS ma3_incorrect,
ANY_VALUE(CTEMA.ma3_version2) AS ma3_correct
FROM CTECOMP
INNER JOIN CTEMA
ON CTECOMP.day = CTEMA.day
GROUP BY CTECOMP.day
ORDER BY day ASC;
- 您的第一个 CTE 取日内平均值。
- 您的第二个 CTE 取 X 天内的日内平均值的移动平均值
- 您的第三个 CTE 及其 window 函数需要移动(3 个周期)日内平均值
遵循您指定的“正确”答案。 这是带有子查询的您的风格
WITH data as (
SELECT
1 as id, "2020-06-20" as day,10 as quantity
UNION ALL SELECT 2,"2020-06-20", 15
UNION ALL SELECT 2, "2020-06-20", 20
UNION ALL SELECT 2, "2020-06-20", 21
UNION ALL SELECT 2, "2020-06-20", 19
UNION ALL SELECT 1,"2020-06-21",5
UNION ALL SELECT 2,"2020-06-21",10
UNION ALL SELECT 2, "2020-06-21",5
UNION ALL SELECT 1,"2020-06-22",9
UNION ALL SELECT 2,"2020-06-22",4
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",20
)
select
*,
avg(avg_intra_day_quantity) over(order by day asc rows between 2 preceding and current row) ma3
from
(
select
distinct
day,
sum(quantity) over w1 as sum_daily_quantity,
avg(quantity) over w1 as avg_intra_day_quantity,
percentile_cont(quantity, 0.5) over w1 as median,
stddev(quantity) over w1 as stdd,
from data
window w1 as (partition by day)
)
这是一个带有分组依据和 window 函数(无 CTE)的子查询
WITH data as (
SELECT
1 as id, "2020-06-20" as day,10 as quantity
UNION ALL SELECT 2,"2020-06-20", 15
UNION ALL SELECT 2, "2020-06-20", 20
UNION ALL SELECT 2, "2020-06-20", 21
UNION ALL SELECT 2, "2020-06-20", 19
UNION ALL SELECT 1,"2020-06-21",5
UNION ALL SELECT 2,"2020-06-21",10
UNION ALL SELECT 2, "2020-06-21",5
UNION ALL SELECT 1,"2020-06-22",9
UNION ALL SELECT 2,"2020-06-22",4
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",20
)
select
day,
avg(sq) over(order by day asc rows between 2 preceding and current row) ma3
from
(
select
avg(quantity) as sq,
day
from data
group by day
)
这是一个仅使用 window 函数的子查询
WITH data as (
SELECT
1 as id, "2020-06-20" as day,10 as quantity
UNION ALL SELECT 2,"2020-06-20", 15
UNION ALL SELECT 2, "2020-06-20", 20
UNION ALL SELECT 2, "2020-06-20", 21
UNION ALL SELECT 2, "2020-06-20", 19
UNION ALL SELECT 1,"2020-06-21",5
UNION ALL SELECT 2,"2020-06-21",10
UNION ALL SELECT 2, "2020-06-21",5
UNION ALL SELECT 1,"2020-06-22",9
UNION ALL SELECT 2,"2020-06-22",4
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",10
UNION ALL SELECT 2,"2020-06-23",20
)
select
day,
avg(intra_day_avg) over(order by day asc rows between 2 preceding and current row) ma3
from
(
select
distinct
day,
avg(quantity) over(partition by day) intra_day_avg
from
data
)
PS / 仅供参考
如果您正在尝试计算每日简单移动平均线,您应该首先按天对数量求和,然后对 N 个时间段内的总和进行移动平均线计算,这样您就可以得到每日移动平均线。现在你取日内平均值的平均值。