功能与 clickhouse 中的 lag partition by 相同
function same as lag partition by in clickouse
我需要知道每个用户的订购频率。我的意思是每个用户的 2 个订购时间之间的差异。
在 SQL 中,我使用了“Lag Partition by”,但我不知道如何在 click house 中计算它。
我需要这些数据:
首先我应该用 user_id 和 created_at 对数据进行排序然后我需要为行中的每个用户 ID 设置下一个订单时间。我不能使用邻居功能,因为它不能按 user_id.
进行分区
我不明白为什么 neighbor 不能用于你的情况,但它应该运行良好:
SELECT
user_id,
created,
if(neighbor(user_id, 1, NULL) != user_id, NULL, neighbor(created, 1, NULL)) AS next_created
FROM
(
SELECT
number % 3 AS user_id,
now() + (number * 360) AS created
FROM numbers(11)
ORDER BY
user_id ASC,
created ASC
)
/*
┌─user_id─┬─────────────created─┬────────next_created─┐
│ 0 │ 2020-10-21 16:00:21 │ 2020-10-21 16:18:21 │
│ 0 │ 2020-10-21 16:18:21 │ 2020-10-21 16:36:21 │
│ 0 │ 2020-10-21 16:36:21 │ 2020-10-21 16:54:21 │
│ 0 │ 2020-10-21 16:54:21 │ ᴺᵁᴸᴸ │
│ 1 │ 2020-10-21 16:06:21 │ 2020-10-21 16:24:21 │
│ 1 │ 2020-10-21 16:24:21 │ 2020-10-21 16:42:21 │
│ 1 │ 2020-10-21 16:42:21 │ 2020-10-21 17:00:21 │
│ 1 │ 2020-10-21 17:00:21 │ ᴺᵁᴸᴸ │
│ 2 │ 2020-10-21 16:12:21 │ 2020-10-21 16:30:21 │
│ 2 │ 2020-10-21 16:30:21 │ 2020-10-21 16:48:21 │
│ 2 │ 2020-10-21 16:48:21 │ ᴺᵁᴸᴸ │
└─────────┴─────────────────────┴─────────────────────┘
*/
groupArray
允许转换这个
select 1 uid, 555 time union all select 1, 666 union all select 1, 777
┌─uid─┬─time─┐
│ 1 │ 555 │
│ 1 │ 666 │
│ 1 │ 777 │
└─────┴──────┘
至此
select uid, groupArray(time) dtime from
(select * from (select 1 uid, 555 time union all select 1, 666 union all select 1, 777) order by uid, time)
group by uid
┌─uid─┬─dtime─────────┐
│ 1 │ [555,666,777] │
└─────┴───────────────┘
和
select uid, arrayJoin(arrayPushBack(arrayPopFront(groupArray(time)), null)) dtime from
(select * from (select 1 uid, 555 time union all select 1, 666 union all select 1, 777) order by uid, time)
group by uid
┌─uid─┬─dtime─┐
│ 1 │ 666 │
│ 1 │ 777 │
│ 1 │ ᴺᵁᴸᴸ │
└─────┴───────┘
select uid, time, atime from (
select uid, groupArray(time) as stime, arrayPushBack(arrayPopFront(stime), null) dtime from
(select * from (select 1 uid, 555 time union all select 1, 666 union all select 1, 777) order by uid, time)
group by uid )
array join stime as time, dtime as atime
┌─uid─┬─time─┬─atime─┐
│ 1 │ 555 │ 666 │
│ 1 │ 666 │ 777 │
│ 1 │ 777 │ ᴺᵁᴸᴸ │
└─────┴──────┴───────┘
我需要知道每个用户的订购频率。我的意思是每个用户的 2 个订购时间之间的差异。
在 SQL 中,我使用了“Lag Partition by”,但我不知道如何在 click house 中计算它。
我需要这些数据:
首先我应该用 user_id 和 created_at 对数据进行排序然后我需要为行中的每个用户 ID 设置下一个订单时间。我不能使用邻居功能,因为它不能按 user_id.
进行分区我不明白为什么 neighbor 不能用于你的情况,但它应该运行良好:
SELECT
user_id,
created,
if(neighbor(user_id, 1, NULL) != user_id, NULL, neighbor(created, 1, NULL)) AS next_created
FROM
(
SELECT
number % 3 AS user_id,
now() + (number * 360) AS created
FROM numbers(11)
ORDER BY
user_id ASC,
created ASC
)
/*
┌─user_id─┬─────────────created─┬────────next_created─┐
│ 0 │ 2020-10-21 16:00:21 │ 2020-10-21 16:18:21 │
│ 0 │ 2020-10-21 16:18:21 │ 2020-10-21 16:36:21 │
│ 0 │ 2020-10-21 16:36:21 │ 2020-10-21 16:54:21 │
│ 0 │ 2020-10-21 16:54:21 │ ᴺᵁᴸᴸ │
│ 1 │ 2020-10-21 16:06:21 │ 2020-10-21 16:24:21 │
│ 1 │ 2020-10-21 16:24:21 │ 2020-10-21 16:42:21 │
│ 1 │ 2020-10-21 16:42:21 │ 2020-10-21 17:00:21 │
│ 1 │ 2020-10-21 17:00:21 │ ᴺᵁᴸᴸ │
│ 2 │ 2020-10-21 16:12:21 │ 2020-10-21 16:30:21 │
│ 2 │ 2020-10-21 16:30:21 │ 2020-10-21 16:48:21 │
│ 2 │ 2020-10-21 16:48:21 │ ᴺᵁᴸᴸ │
└─────────┴─────────────────────┴─────────────────────┘
*/
groupArray
允许转换这个
select 1 uid, 555 time union all select 1, 666 union all select 1, 777
┌─uid─┬─time─┐
│ 1 │ 555 │
│ 1 │ 666 │
│ 1 │ 777 │
└─────┴──────┘
至此
select uid, groupArray(time) dtime from
(select * from (select 1 uid, 555 time union all select 1, 666 union all select 1, 777) order by uid, time)
group by uid
┌─uid─┬─dtime─────────┐
│ 1 │ [555,666,777] │
└─────┴───────────────┘
和
select uid, arrayJoin(arrayPushBack(arrayPopFront(groupArray(time)), null)) dtime from
(select * from (select 1 uid, 555 time union all select 1, 666 union all select 1, 777) order by uid, time)
group by uid
┌─uid─┬─dtime─┐
│ 1 │ 666 │
│ 1 │ 777 │
│ 1 │ ᴺᵁᴸᴸ │
└─────┴───────┘
select uid, time, atime from (
select uid, groupArray(time) as stime, arrayPushBack(arrayPopFront(stime), null) dtime from
(select * from (select 1 uid, 555 time union all select 1, 666 union all select 1, 777) order by uid, time)
group by uid )
array join stime as time, dtime as atime
┌─uid─┬─time─┬─atime─┐
│ 1 │ 555 │ 666 │
│ 1 │ 666 │ 777 │
│ 1 │ 777 │ ᴺᵁᴸᴸ │
└─────┴──────┴───────┘