我如何在 clickhouse 中聚合时计算元素
How can i count elements while aggregate in clickhouse
我有这样的数据
customer_id - product_id
1 - 10
1 - 11
1 - 12
1 - 11
2 - 15
2 - 20
聚合后我想得到:
customer_id - product_id
1 - {10:1, 11:2, 12:1}
2 - {15:1, 20:1}
试试这个查询:
SELECT
customer_id,
arrayMap((product_id, count) -> (product_id, count),
untuple(sumMap([product_id], [1]))) AS result
FROM
(
/* Emulate the test dataset. */
SELECT
data.1 AS customer_id,
data.2 AS product_id
FROM
(
SELECT arrayJoin([(1, 10), (1, 11), (1, 12), (1, 11),
(2, 15), (2, 20)]) AS data
)
)
GROUP BY customer_id
/*
┌─customer_id─┬─result─────────────────┐
│ 1 │ [(10,1),(11,2),(12,1)] │
│ 2 │ [(15,1),(20,1)] │
└─────────────┴────────────────────────┘
*/
WITH dataset AS
(
SELECT
data.1 AS customer_id,
data.2 AS product_id
FROM
(
SELECT arrayJoin([
(1, 10), (1, 11), (1, 12), (1, 11), (2, 15), (2, 20)
]) AS data
)
)
SELECT
customer_id,
arrayMap(
x -> (x, arrayCount(y -> (y = x), groupArray(product_id) AS product_ids)),
arrayDistinct(product_ids)
) AS result
FROM dataset
GROUP BY customer_id
┌─customer_id─┬─result─────────────────┐
│ 1 │ [(10,1),(11,2),(12,1)] │
│ 2 │ [(15,1),(20,1)] │
└─────────────┴────────────────────────┘
我有这样的数据
customer_id - product_id
1 - 10
1 - 11
1 - 12
1 - 11
2 - 15
2 - 20
聚合后我想得到:
customer_id - product_id
1 - {10:1, 11:2, 12:1}
2 - {15:1, 20:1}
试试这个查询:
SELECT
customer_id,
arrayMap((product_id, count) -> (product_id, count),
untuple(sumMap([product_id], [1]))) AS result
FROM
(
/* Emulate the test dataset. */
SELECT
data.1 AS customer_id,
data.2 AS product_id
FROM
(
SELECT arrayJoin([(1, 10), (1, 11), (1, 12), (1, 11),
(2, 15), (2, 20)]) AS data
)
)
GROUP BY customer_id
/*
┌─customer_id─┬─result─────────────────┐
│ 1 │ [(10,1),(11,2),(12,1)] │
│ 2 │ [(15,1),(20,1)] │
└─────────────┴────────────────────────┘
*/
WITH dataset AS
(
SELECT
data.1 AS customer_id,
data.2 AS product_id
FROM
(
SELECT arrayJoin([
(1, 10), (1, 11), (1, 12), (1, 11), (2, 15), (2, 20)
]) AS data
)
)
SELECT
customer_id,
arrayMap(
x -> (x, arrayCount(y -> (y = x), groupArray(product_id) AS product_ids)),
arrayDistinct(product_ids)
) AS result
FROM dataset
GROUP BY customer_id
┌─customer_id─┬─result─────────────────┐
│ 1 │ [(10,1),(11,2),(12,1)] │
│ 2 │ [(15,1),(20,1)] │
└─────────────┴────────────────────────┘