BigQuery - 使用窗口函数的移动平均线

BigQuery - Moving Averages using Windowing Function

我正在尝试创建一个度量来使用窗口函数计算移动平均值,而不是创建几个常见的 table 表达式 CTE。为什么当我按照 CTE 中分组数据的方式对数据进行分区时,窗口函数返回不正确的结果?

WITH data as (
  SELECT
  1 as id, "2020-06-20" as day,10 as quantity
  UNION ALL SELECT 2,"2020-06-20", 15
  UNION ALL SELECT 2, "2020-06-20", 20
  UNION ALL SELECT 2, "2020-06-20", 21
  UNION ALL SELECT 2, "2020-06-20", 19

  UNION ALL SELECT 1,"2020-06-21",5
  UNION ALL SELECT 2,"2020-06-21",10
  UNION ALL SELECT 2, "2020-06-21",5

  UNION ALL SELECT 1,"2020-06-22",9
  UNION ALL SELECT 2,"2020-06-22",4

  UNION ALL SELECT 2,"2020-06-23",10
  UNION ALL SELECT 2,"2020-06-23",10
  UNION ALL SELECT 2,"2020-06-23",20
)
, CTEDAILY AS (
  SELECT 
    AVG(quantity) AS sq,
    day
  FROM data 
  GROUP BY day 
)
, CTEMA AS (
  SELECT 
    day,
    AVG(sq) OVER (ORDER BY day ASC ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) ma3_version2
  FROM CTEDAILY 
)

, CTECOMP AS (
SELECT  
  day,
  quantity,
  SUM(quantity) OVER w1 AS sum_quantity,
  AVG(quantity) OVER w1 AS avg_quantity,
  PERCENTILE_CONT(quantity, 0.5) OVER w1 AS median,
  STDDEV(quantity) OVER w1 AS stdd,

  AVG(quantity) OVER w2_ma3 AS ma3_version1, /* not daily, currently returning MA by row? want to first sum then MA3??? */
FROM data  
WINDOW w1 AS (PARTITION BY day),
w2_ma3 AS (PARTITION BY day ORDER BY day ASC ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)
ORDER BY day ASC
)
SELECT 
  CTECOMP.day, 
  ANY_VALUE(CTECOMP.sum_quantity) sum_q, 
  ANY_VALUE(avg_quantity) AS avg_q, 
  ANY_VALUE(median) AS median, 
  ANY_VALUE(stdd) AS stdd,
  ANY_VALUE(CTECOMP.ma3_version1) AS ma3_incorrect,
  ANY_VALUE(CTEMA.ma3_version2) AS ma3_correct
FROM CTECOMP
INNER JOIN CTEMA 
  ON CTECOMP.day = CTEMA.day 
GROUP BY CTECOMP.day
ORDER BY day ASC;

  1. 您的第一个 CTE 取日内平均值。
  2. 您的第二个 CTE 取 X 天内的日内平均值的移动平均值
  3. 您的第三个 CTE 及其 window 函数需要移动(3 个周期)日内平均值

遵循您指定的“正确”答案。 这是带有子查询的您的风格

WITH data as (
  SELECT
  1 as id, "2020-06-20" as day,10 as quantity
  UNION ALL SELECT 2,"2020-06-20", 15
  UNION ALL SELECT 2, "2020-06-20", 20
  UNION ALL SELECT 2, "2020-06-20", 21
  UNION ALL SELECT 2, "2020-06-20", 19

  UNION ALL SELECT 1,"2020-06-21",5
  UNION ALL SELECT 2,"2020-06-21",10
  UNION ALL SELECT 2, "2020-06-21",5

  UNION ALL SELECT 1,"2020-06-22",9
  UNION ALL SELECT 2,"2020-06-22",4

  UNION ALL SELECT 2,"2020-06-23",10
  UNION ALL SELECT 2,"2020-06-23",10
  UNION ALL SELECT 2,"2020-06-23",20
)

select
*,
avg(avg_intra_day_quantity) over(order by day asc rows between 2 preceding and current row) ma3
from
(
  select
  distinct  
  day,
  sum(quantity) over w1 as sum_daily_quantity,
  avg(quantity) over w1 as avg_intra_day_quantity,
  percentile_cont(quantity, 0.5) over w1 as median,
  stddev(quantity) over w1 as stdd,


  from data
  window w1 as (partition by day)
)


这是一个带有分组依据和 window 函数(无 CTE)的子查询

WITH data as (
  SELECT
  1 as id, "2020-06-20" as day,10 as quantity
  UNION ALL SELECT 2,"2020-06-20", 15
  UNION ALL SELECT 2, "2020-06-20", 20
  UNION ALL SELECT 2, "2020-06-20", 21
  UNION ALL SELECT 2, "2020-06-20", 19

  UNION ALL SELECT 1,"2020-06-21",5
  UNION ALL SELECT 2,"2020-06-21",10
  UNION ALL SELECT 2, "2020-06-21",5

  UNION ALL SELECT 1,"2020-06-22",9
  UNION ALL SELECT 2,"2020-06-22",4

  UNION ALL SELECT 2,"2020-06-23",10
  UNION ALL SELECT 2,"2020-06-23",10
  UNION ALL SELECT 2,"2020-06-23",20
)

select
day,
avg(sq) over(order by day asc rows between 2 preceding and current row) ma3

from 
(
select 
avg(quantity) as sq,
day

from data 
group by day
)


这是一个仅使用 window 函数的子查询

WITH data as (
  SELECT
  1 as id, "2020-06-20" as day,10 as quantity
  UNION ALL SELECT 2,"2020-06-20", 15
  UNION ALL SELECT 2, "2020-06-20", 20
  UNION ALL SELECT 2, "2020-06-20", 21
  UNION ALL SELECT 2, "2020-06-20", 19

  UNION ALL SELECT 1,"2020-06-21",5
  UNION ALL SELECT 2,"2020-06-21",10
  UNION ALL SELECT 2, "2020-06-21",5

  UNION ALL SELECT 1,"2020-06-22",9
  UNION ALL SELECT 2,"2020-06-22",4

  UNION ALL SELECT 2,"2020-06-23",10
  UNION ALL SELECT 2,"2020-06-23",10
  UNION ALL SELECT 2,"2020-06-23",20
)


select
day,
avg(intra_day_avg) over(order by day asc rows between 2 preceding and current row) ma3

from 
(
select
distinct
day,
avg(quantity) over(partition by day) intra_day_avg

from
data
)



PS / 仅供参考
如果您正在尝试计算每日简单移动平均线,您应该首先按天对数量求和,然后对 N 个时间段内的总和进行移动平均线计算,这样您就可以得到每日移动平均线。现在你取日内平均值的平均值。