SQL 用前一个非 NULL 行填充 NULL 行

SQL fill NULL row with previous non NULL row

所以我有一个 bq table 创建为几个日历日期快照,加入到 trx 数据。请找到以下查询以填充 table

  SELECT
    GENERATE_DATE_ARRAY(date_add(DATE(CURRENT_TIMESTAMP), interval -20 day), DATE('2020-08-22')) AS date_array
  )
         
 ,dim_date AS (
  SELECT
    sn_date
  FROM
    date_array_table,
    UNNEST(date_array) AS sn_date
    )
  
 ,data_test as (
 select date('2020-08-20') as date, 1 as id, 1000 as num
 UNION ALL
 select date('2020-08-18') as date, 1 as id, 130 as num
 UNION ALL
 select date('2020-08-18') as date, 2 as id, 300 as num
 UNION ALL
 select date('2020-08-13') as date, 2 as id, 250 as num
 )

 ,jjoin as (
 select
 *
 from dim_date
 left join 
 data_test
 on 1=1 and sn_date = date
 )

 select *
 from jjoin
 order by 1 desc

结果如下img

接下来我想用 NULL 值填充快照行,每个 id 按日期填充以前的非 NULL 行。我尝试使用 max 或 first_value 但它仍然是 NULL。示例:

select sn_date
coalesce(num, max (num) over (partition by id order by date)
from jjoin

但它不显示以前的非空行。有什么建议吗?谢谢

预期:

--------------------------
sn_date | date | id | num
--------------------------
08/22   | 08/20| 1  | 1000
08/21   | 08/20| 1  | 1000
08/20   | 08/20| 1  | 1000
08/19   | 08/18| 1  | 130
08/18   | 08/18| 1  | 130
08/18   | 08/18| 2  | 300
08/17   | 08/13| 1  | 250
08/16   | 08/13| 1  | 250
08/15   | 08/13| 1  | 250

您可以使用 last_value():

select sn_date, date, id, num,
       last_value(date ignore nulls) over (order by date desc),
       last_value(id ignore nulls) over (order by date desc),
       last_value(num ignore nulls) over (order by date desc)

我应该注意到 SQL 标准在 lag() 以及 first_value()last_value() 上支持 ignore nulls。当我考虑解决这个问题时,我会考虑lag()。我认为 BigQuery 是唯一支持 ignore null 但不支持 lag().

的数据库

以下适用于 BigQuery 标准 SQL

#standardSQL
SELECT sn_date, 
  FIRST_VALUE(date IGNORE NULLS) OVER (win) AS date,
  FIRST_VALUE(id IGNORE NULLS) OVER (win) AS id,
  FIRST_VALUE(num IGNORE NULLS) OVER (win) AS num
FROM your_current_result
WINDOW win AS (ORDER BY sn_date DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)

如果应用到问题中显示的当前结果,如下例

#standardSQL
WITH your_current_result AS (
  SELECT DATE '2020-08-20' sn_date, DATE '2020-08-20' date, 1 id, 1000 num UNION ALL
  SELECT '2020-08-22', NULL, NULL, NULL UNION ALL
  SELECT '2020-08-21', NULL, NULL, NULL UNION ALL
  SELECT '2020-08-19', NULL, NULL, NULL UNION ALL
  SELECT '2020-08-18', '2020-08-18', 1, 130 UNION ALL
  SELECT '2020-08-18', '2020-08-18', 2, 300 UNION ALL
  SELECT '2020-08-17', NULL, NULL, NULL UNION ALL
  SELECT '2020-08-16', NULL, NULL, NULL UNION ALL
  SELECT '2020-08-15', NULL, NULL, NULL UNION ALL
  SELECT '2020-08-14', NULL, NULL, NULL UNION ALL
  SELECT '2020-08-13', '2020-08-13', 1, 250 
)
SELECT sn_date, 
  FIRST_VALUE(date IGNORE NULLS) OVER (win) AS date,
  FIRST_VALUE(id IGNORE NULLS) OVER (win) AS id,
  FIRST_VALUE(num IGNORE NULLS) OVER (win) AS num
FROM your_current_result
WINDOW win AS (ORDER BY sn_date DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)

结果是

Row sn_date     date        id  num  
1   2020-08-22  2020-08-20  1   1000     
2   2020-08-21  2020-08-20  1   1000     
3   2020-08-20  2020-08-20  1   1000     
4   2020-08-19  2020-08-18  1   130  
5   2020-08-18  2020-08-18  1   130  
6   2020-08-18  2020-08-18  2   300  
7   2020-08-17  2020-08-13  1   250  
8   2020-08-16  2020-08-13  1   250  
9   2020-08-15  2020-08-13  1   250  
10  2020-08-14  2020-08-13  1   250  
11  2020-08-13  2020-08-13  1   250