Clickhouse 移动平均线
Clickhouse moving average
输入:
点击之家
Table一个
business_dttm(日期时间)
金额(浮动)
我需要计算每个 business_dttm
的 15 分钟(或最后 3 条记录)的移动总和
例如
amount business_dttm moving sum
0.3 2018-11-19 13:00:00
0.3 2018-11-19 13:05:00
0.4 2018-11-19 13:10:00 1
0.5 2018-11-19 13:15:00 1.2
0.6 2018-11-19 13:15:00 1.5
0.7 2018-11-19 13:20:00 1.8
0.8 2018-11-19 13:25:00 2.1
0.9 2018-11-19 13:25:00 2.4
0.5 2018-11-19 13:30:00 2.2
很遗憾,我们没有 window Clickhouse
功能和不平等条件加入
没有交叉连接和where条件怎么办?
如果 window 尺寸可数地小,你可以这样做
SELECT
sum(window.2) AS amount,
max(dttm) AS business_dttm,
sum(amt) AS moving_sum
FROM
(
SELECT
arrayJoin([(rowNumberInAllBlocks(), amount), (rowNumberInAllBlocks() + 1, 0), (rowNumberInAllBlocks() + 2, 0)]) AS window,
amount AS amt,
business_dttm AS dttm
FROM
(
SELECT
amount,
business_dttm
FROM A
ORDER BY business_dttm
)
)
GROUP BY window.1
HAVING count() = 3
ORDER BY window.1;
前两行被忽略,因为 ClickHouse 不会将聚合折叠为 null。您可以稍后添加它们。
更新:
仍然可以计算任意 window 大小的移动总和。根据需要调整 window_size
(本例为 3)。
-- Note, rowNumberInAllBlocks is incorrect if declared inside with block due to being stateful
WITH
(
SELECT arrayCumSum(groupArray(amount))
FROM
(
SELECT
amount
FROM A
ORDER BY business_dttm
)
) AS arr,
3 AS window_size
SELECT
amount,
business_dttm,
if(rowNumberInAllBlocks() + 1 < window_size, NULL, arr[rowNumberInAllBlocks() + 1] - arr[rowNumberInAllBlocks() + 1 - window_size]) AS moving_sum
FROM
(
SELECT
amount,
business_dttm
FROM A
ORDER BY business_dttm
)
或者这个变体
SELECT
amount,
business_dttm,
moving_sum
FROM
(
WITH 3 AS window_size
SELECT
groupArray(amount) AS amount_arr,
groupArray(business_dttm) AS business_dttm_arr,
arrayCumSum(amount_arr) AS amount_cum_arr,
arrayMap(i -> if(i < window_size, NULL, amount_cum_arr[i] - amount_cum_arr[(i - window_size)]), arrayEnumerate(amount_cum_arr)) AS moving_sum_arr
FROM
(
SELECT *
FROM A
ORDER BY business_dttm ASC
)
)
ARRAY JOIN
amount_arr AS amount,
business_dttm_arr AS business_dttm,
moving_sum_arr AS moving_sum
公平警告,这两种方法都远非最佳,但它展示了 ClickHouse 超越 SQL 的独特力量。
从 version 21.4 开始,添加了对 window 函数的 完整 支持。此时它被标记为实验性功能。
SELECT
amount,
business_dttm,
sum(amount) OVER (ORDER BY business_dttm ASC ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS sum
FROM (
SELECT data.1 AS amount, toDateTime(data.2) AS business_dttm
FROM (
SELECT arrayJoin([
(0.3, '2018-11-19 13:00:00'),
(0.3, '2018-11-19 13:05:00'),
(0.4, '2018-11-19 13:10:00'),
(0.5, '2018-11-19 13:15:00'),
(0.6, '2018-11-19 13:15:00'),
(0.7, '2018-11-19 13:20:00'),
(0.8, '2018-11-19 13:25:00'),
(0.9, '2018-11-19 13:25:00'),
(0.5, '2018-11-19 13:30:00')]) data)
)
SETTINGS allow_experimental_window_functions = 1
/*
┌─amount─┬───────business_dttm─┬────────────────sum─┐
│ 0.3 │ 2018-11-19 13:00:00 │ 0.3 │
│ 0.3 │ 2018-11-19 13:05:00 │ 0.6 │
│ 0.4 │ 2018-11-19 13:10:00 │ 1 │
│ 0.5 │ 2018-11-19 13:15:00 │ 1.2 │
│ 0.6 │ 2018-11-19 13:15:00 │ 1.5 │
│ 0.7 │ 2018-11-19 13:20:00 │ 1.8 │
│ 0.8 │ 2018-11-19 13:25:00 │ 2.0999999999999996 │
│ 0.9 │ 2018-11-19 13:25:00 │ 2.4 │
│ 0.5 │ 2018-11-19 13:30:00 │ 2.2 │
└────────┴─────────────────────┴────────────────────┘
*/
参见https://altinity.com/blog/clickhouse-window-functions-current-state-of-the-art。
输入: 点击之家
Table一个 business_dttm(日期时间) 金额(浮动)
我需要计算每个 business_dttm
的 15 分钟(或最后 3 条记录)的移动总和例如
amount business_dttm moving sum
0.3 2018-11-19 13:00:00
0.3 2018-11-19 13:05:00
0.4 2018-11-19 13:10:00 1
0.5 2018-11-19 13:15:00 1.2
0.6 2018-11-19 13:15:00 1.5
0.7 2018-11-19 13:20:00 1.8
0.8 2018-11-19 13:25:00 2.1
0.9 2018-11-19 13:25:00 2.4
0.5 2018-11-19 13:30:00 2.2
很遗憾,我们没有 window Clickhouse
功能和不平等条件加入没有交叉连接和where条件怎么办?
如果 window 尺寸可数地小,你可以这样做
SELECT
sum(window.2) AS amount,
max(dttm) AS business_dttm,
sum(amt) AS moving_sum
FROM
(
SELECT
arrayJoin([(rowNumberInAllBlocks(), amount), (rowNumberInAllBlocks() + 1, 0), (rowNumberInAllBlocks() + 2, 0)]) AS window,
amount AS amt,
business_dttm AS dttm
FROM
(
SELECT
amount,
business_dttm
FROM A
ORDER BY business_dttm
)
)
GROUP BY window.1
HAVING count() = 3
ORDER BY window.1;
前两行被忽略,因为 ClickHouse 不会将聚合折叠为 null。您可以稍后添加它们。
更新:
仍然可以计算任意 window 大小的移动总和。根据需要调整 window_size
(本例为 3)。
-- Note, rowNumberInAllBlocks is incorrect if declared inside with block due to being stateful
WITH
(
SELECT arrayCumSum(groupArray(amount))
FROM
(
SELECT
amount
FROM A
ORDER BY business_dttm
)
) AS arr,
3 AS window_size
SELECT
amount,
business_dttm,
if(rowNumberInAllBlocks() + 1 < window_size, NULL, arr[rowNumberInAllBlocks() + 1] - arr[rowNumberInAllBlocks() + 1 - window_size]) AS moving_sum
FROM
(
SELECT
amount,
business_dttm
FROM A
ORDER BY business_dttm
)
或者这个变体
SELECT
amount,
business_dttm,
moving_sum
FROM
(
WITH 3 AS window_size
SELECT
groupArray(amount) AS amount_arr,
groupArray(business_dttm) AS business_dttm_arr,
arrayCumSum(amount_arr) AS amount_cum_arr,
arrayMap(i -> if(i < window_size, NULL, amount_cum_arr[i] - amount_cum_arr[(i - window_size)]), arrayEnumerate(amount_cum_arr)) AS moving_sum_arr
FROM
(
SELECT *
FROM A
ORDER BY business_dttm ASC
)
)
ARRAY JOIN
amount_arr AS amount,
business_dttm_arr AS business_dttm,
moving_sum_arr AS moving_sum
公平警告,这两种方法都远非最佳,但它展示了 ClickHouse 超越 SQL 的独特力量。
从 version 21.4 开始,添加了对 window 函数的 完整 支持。此时它被标记为实验性功能。
SELECT
amount,
business_dttm,
sum(amount) OVER (ORDER BY business_dttm ASC ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS sum
FROM (
SELECT data.1 AS amount, toDateTime(data.2) AS business_dttm
FROM (
SELECT arrayJoin([
(0.3, '2018-11-19 13:00:00'),
(0.3, '2018-11-19 13:05:00'),
(0.4, '2018-11-19 13:10:00'),
(0.5, '2018-11-19 13:15:00'),
(0.6, '2018-11-19 13:15:00'),
(0.7, '2018-11-19 13:20:00'),
(0.8, '2018-11-19 13:25:00'),
(0.9, '2018-11-19 13:25:00'),
(0.5, '2018-11-19 13:30:00')]) data)
)
SETTINGS allow_experimental_window_functions = 1
/*
┌─amount─┬───────business_dttm─┬────────────────sum─┐
│ 0.3 │ 2018-11-19 13:00:00 │ 0.3 │
│ 0.3 │ 2018-11-19 13:05:00 │ 0.6 │
│ 0.4 │ 2018-11-19 13:10:00 │ 1 │
│ 0.5 │ 2018-11-19 13:15:00 │ 1.2 │
│ 0.6 │ 2018-11-19 13:15:00 │ 1.5 │
│ 0.7 │ 2018-11-19 13:20:00 │ 1.8 │
│ 0.8 │ 2018-11-19 13:25:00 │ 2.0999999999999996 │
│ 0.9 │ 2018-11-19 13:25:00 │ 2.4 │
│ 0.5 │ 2018-11-19 13:30:00 │ 2.2 │
└────────┴─────────────────────┴────────────────────┘
*/
参见https://altinity.com/blog/clickhouse-window-functions-current-state-of-the-art。