如何使用 mysql 中的最后一个值 windows 分析函数结转时间序列中缺失日期的值
How to carrying over values for missing dates in time series using last value windows analytical functions in mysql
如何结转缺失日期的值 postcode/indicator_category 以创建完整的月度时间序列。我正在尝试使用 last_value 来继承值,但无法实现。我的方法正确吗?非常感谢任何帮助。
示例给定 table:
create table test.indicator_data(
postcode text,
month_ts date,
indicator_cat integer,
measure double precision);
将值插入到 indicator_data table
INSERT INTO test.indicator_data
VALUES
('sw5', '2017-07-01', 2, 99212.231),
('sw5', '2018-02-01', 2, 232.215),
('sw5', '2017-11-01', 3, 1523.2576),
('sw5', '2017-12-01', 3, 152.16),
('sw5', '2018-02-01', 3, 142.981),
('sw5', '2018-07-01', 3 , 142.1361),
('sw5 9', '2018-03-01', 2, 821.21),
('sw5 9', '2018-02-01', 2, 1182.19);
输入:
postcode
month_ts
indicator_cat
measure
sw5
2017-07-01
2
99212.231
sw5
2018-02-01
2
232.215
sw5
2017-11-01
3
1523.2576
sw5
2017-12-01
3
152.16
sw5
2018-02-01
3
142.981
sw5
2018-07-01
3
142.1361
sw59
2018-03-01
2
821.21
sw59
2018-02-01
2
1182.19
预期输出:
postcode
month_ts
indicator_cat
measure
sw5
2017-07-01
2
99212.231
sw5
2017-08-01
2
99212.231
sw5
2017-09-01
2
99212.231
sw5
2017-10-01
2
99212.231
sw5
2017-11-01
2
99212.231
sw5
2017-12-01
2
99212.231
sw5
2018-01-01
2
99212.231
sw5
2018-02-01
2
232.215
sw5
2017-11-01
3
1523.2576
sw5
2017-12-01
3
152.16
sw5
2018-01-01
3
152.16
sw5
2018-02-01
3
142.981
sw5
2018-03-01
3
142.981
sw5
2018-04-01
3
142.981
sw5
2018-05-01
3
142.981
sw5
2018-06-01
3
142.981
sw5
2018-07-01
3
142.1361
sw59
2018-02-01
2
1182.19
sw59
2018-03-01
2
821.21
尝试过的解决方案:
使用以下程序创建日历 table:
DELIMITER |
CREATE PROCEDURE test.fill_calendar(`start_date` DATE, `end_date` DATE)
BEGIN
DECLARE `crt_date` DATE;
SET `crt_date`=start_date;
WHILE `crt_date` < `end_date` DO
INSERT INTO calendar VALUES(`crt_date`);
SET `crt_date` = ADDDATE(`crt_date`, INTERVAL 1 MONTH);
END WHILE;
END |
DELIMITER ;
调用过程
CALL test.fill_calendar('2017-07-01', '2018-07-01');
查询试图丢失日期,但未能获得 last_values
select postcode,last_value(postcode) over (order by datefield) from (
SELECT indicator_data.postcode,calendar.datefield AS date,indicator_data.indicator_cat,indicator_data.measure
FROM indicator_data RIGHT join calendar ON (DATE(indicator_data.month_ts) = calendar.datefield)
WHERE (calendar.datefield BETWEEN (SELECT MIN(DATE(month_ts)) FROM indicator_data) AND (SELECT MAX(DATE(month_ts)) FROM indicator_data))
GROUP BY date) as test;
递归CTE可以得到预期的结果:
WITH RECURSIVE
max_info AS (
SELECT postcode, indicator_cat, MAX(month_ts) month_ts
FROM indicator_data
GROUP BY postcode, indicator_cat
),
fill_info AS (
SELECT postcode, indicator_cat, month_ts, measure
FROM indicator_data
UNION ALL
SELECT postcode, indicator_cat, month_ts + INTERVAL 1 MONTH month_ts, measure
FROM fill_info i
WHERE NOT EXISTS (
SELECT 1
FROM indicator_data d
WHERE d.postcode = i.postcode
AND d.indicator_cat = i.indicator_cat
AND d.month_ts = i.month_ts + INTERVAL 1 MONTH
)
AND EXISTS (
SELECT 1
FROM max_info m
WHERE m.postcode = i.postcode
AND m.indicator_cat = i.indicator_cat
AND m.month_ts > i.month_ts + INTERVAL 1 MONTH
)
)
SELECT postcode, month_ts, indicator_cat, measure
FROM fill_info
ORDER BY postcode, indicator_cat, month_ts
您可以使用递归 CTE 创建日历 table 的结果集。
然后将日历 LEFT
连接到 table 并使用 SUM()
window 函数在 measure
中创建具有 null
的行组。
最后使用 MAX()
window 函数选择最后一个非空值 measure
:
WITH
RECURSIVE dates AS (
SELECT postcode, indicator_cat, MIN(month_ts) month_ts, MAX(month_ts) max_month_ts
FROM indicator_data
GROUP BY postcode, indicator_cat
UNION ALL
SELECT postcode, indicator_cat, month_ts + INTERVAL 1 MONTH, max_month_ts
FROM dates
WHERE month_ts + INTERVAL 1 MONTH <= max_month_ts
),
cte AS (
SELECT d.postcode, d.month_ts, d.indicator_cat, i.measure,
SUM(i.measure IS NOT NULL) OVER (PARTITION BY d.postcode, d.indicator_cat ORDER BY d.month_ts) grp
FROM dates d LEFT JOIN indicator_data i
ON (i.postcode, i.indicator_cat, i.month_ts) = (d.postcode, d.indicator_cat, d.month_ts)
)
SELECT postcode, month_ts, indicator_cat,
MAX(measure) OVER (PARTITION BY postcode, indicator_cat, grp) measure
FROM cte
ORDER BY postcode, indicator_cat, month_ts;
参见demo。
如何结转缺失日期的值 postcode/indicator_category 以创建完整的月度时间序列。我正在尝试使用 last_value 来继承值,但无法实现。我的方法正确吗?非常感谢任何帮助。
示例给定 table:
create table test.indicator_data(
postcode text,
month_ts date,
indicator_cat integer,
measure double precision);
将值插入到 indicator_data table
INSERT INTO test.indicator_data
VALUES
('sw5', '2017-07-01', 2, 99212.231),
('sw5', '2018-02-01', 2, 232.215),
('sw5', '2017-11-01', 3, 1523.2576),
('sw5', '2017-12-01', 3, 152.16),
('sw5', '2018-02-01', 3, 142.981),
('sw5', '2018-07-01', 3 , 142.1361),
('sw5 9', '2018-03-01', 2, 821.21),
('sw5 9', '2018-02-01', 2, 1182.19);
输入:
postcode | month_ts | indicator_cat | measure |
---|---|---|---|
sw5 | 2017-07-01 | 2 | 99212.231 |
sw5 | 2018-02-01 | 2 | 232.215 |
sw5 | 2017-11-01 | 3 | 1523.2576 |
sw5 | 2017-12-01 | 3 | 152.16 |
sw5 | 2018-02-01 | 3 | 142.981 |
sw5 | 2018-07-01 | 3 | 142.1361 |
sw59 | 2018-03-01 | 2 | 821.21 |
sw59 | 2018-02-01 | 2 | 1182.19 |
预期输出:
postcode | month_ts | indicator_cat | measure |
---|---|---|---|
sw5 | 2017-07-01 | 2 | 99212.231 |
sw5 | 2017-08-01 | 2 | 99212.231 |
sw5 | 2017-09-01 | 2 | 99212.231 |
sw5 | 2017-10-01 | 2 | 99212.231 |
sw5 | 2017-11-01 | 2 | 99212.231 |
sw5 | 2017-12-01 | 2 | 99212.231 |
sw5 | 2018-01-01 | 2 | 99212.231 |
sw5 | 2018-02-01 | 2 | 232.215 |
sw5 | 2017-11-01 | 3 | 1523.2576 |
sw5 | 2017-12-01 | 3 | 152.16 |
sw5 | 2018-01-01 | 3 | 152.16 |
sw5 | 2018-02-01 | 3 | 142.981 |
sw5 | 2018-03-01 | 3 | 142.981 |
sw5 | 2018-04-01 | 3 | 142.981 |
sw5 | 2018-05-01 | 3 | 142.981 |
sw5 | 2018-06-01 | 3 | 142.981 |
sw5 | 2018-07-01 | 3 | 142.1361 |
sw59 | 2018-02-01 | 2 | 1182.19 |
sw59 | 2018-03-01 | 2 | 821.21 |
尝试过的解决方案:
使用以下程序创建日历 table:
DELIMITER |
CREATE PROCEDURE test.fill_calendar(`start_date` DATE, `end_date` DATE)
BEGIN
DECLARE `crt_date` DATE;
SET `crt_date`=start_date;
WHILE `crt_date` < `end_date` DO
INSERT INTO calendar VALUES(`crt_date`);
SET `crt_date` = ADDDATE(`crt_date`, INTERVAL 1 MONTH);
END WHILE;
END |
DELIMITER ;
调用过程
CALL test.fill_calendar('2017-07-01', '2018-07-01');
查询试图丢失日期,但未能获得 last_values
select postcode,last_value(postcode) over (order by datefield) from (
SELECT indicator_data.postcode,calendar.datefield AS date,indicator_data.indicator_cat,indicator_data.measure
FROM indicator_data RIGHT join calendar ON (DATE(indicator_data.month_ts) = calendar.datefield)
WHERE (calendar.datefield BETWEEN (SELECT MIN(DATE(month_ts)) FROM indicator_data) AND (SELECT MAX(DATE(month_ts)) FROM indicator_data))
GROUP BY date) as test;
递归CTE可以得到预期的结果:
WITH RECURSIVE
max_info AS (
SELECT postcode, indicator_cat, MAX(month_ts) month_ts
FROM indicator_data
GROUP BY postcode, indicator_cat
),
fill_info AS (
SELECT postcode, indicator_cat, month_ts, measure
FROM indicator_data
UNION ALL
SELECT postcode, indicator_cat, month_ts + INTERVAL 1 MONTH month_ts, measure
FROM fill_info i
WHERE NOT EXISTS (
SELECT 1
FROM indicator_data d
WHERE d.postcode = i.postcode
AND d.indicator_cat = i.indicator_cat
AND d.month_ts = i.month_ts + INTERVAL 1 MONTH
)
AND EXISTS (
SELECT 1
FROM max_info m
WHERE m.postcode = i.postcode
AND m.indicator_cat = i.indicator_cat
AND m.month_ts > i.month_ts + INTERVAL 1 MONTH
)
)
SELECT postcode, month_ts, indicator_cat, measure
FROM fill_info
ORDER BY postcode, indicator_cat, month_ts
您可以使用递归 CTE 创建日历 table 的结果集。
然后将日历 LEFT
连接到 table 并使用 SUM()
window 函数在 measure
中创建具有 null
的行组。
最后使用 MAX()
window 函数选择最后一个非空值 measure
:
WITH
RECURSIVE dates AS (
SELECT postcode, indicator_cat, MIN(month_ts) month_ts, MAX(month_ts) max_month_ts
FROM indicator_data
GROUP BY postcode, indicator_cat
UNION ALL
SELECT postcode, indicator_cat, month_ts + INTERVAL 1 MONTH, max_month_ts
FROM dates
WHERE month_ts + INTERVAL 1 MONTH <= max_month_ts
),
cte AS (
SELECT d.postcode, d.month_ts, d.indicator_cat, i.measure,
SUM(i.measure IS NOT NULL) OVER (PARTITION BY d.postcode, d.indicator_cat ORDER BY d.month_ts) grp
FROM dates d LEFT JOIN indicator_data i
ON (i.postcode, i.indicator_cat, i.month_ts) = (d.postcode, d.indicator_cat, d.month_ts)
)
SELECT postcode, month_ts, indicator_cat,
MAX(measure) OVER (PARTITION BY postcode, indicator_cat, grp) measure
FROM cte
ORDER BY postcode, indicator_cat, month_ts;
参见demo。