我如何 select 唯一键,数组中的值对到 Clickhouse 中的物化视图?
How do I select unique key, value pairs from an array into a materialized view in Clickhouse?
table结构如下,
CREATE TABLE metrics
(
`created_date` Date,
`created_at` DateTime,
`host` LowCardinality(String),
`metrics_name` LowCardinality(String),
`tags` Nested(key LowCardinality(String), value LowCardinality(String))
)
ENGINE = MergeTree(created_date, (created_at), 8192)
如何生成结构类似于
的物化视图
Host
Key
Values
host
Key: X
Values : ["uniq1", "uniq2"]
考虑使用这个 MV:
CREATE MATERIALIZED VIEW metrics_mv
ENGINE = MergeTree()
ORDER BY host AS
SELECT
host,
tags.key key,
groupArray(tags.value) values /* or 'groupUniqArray(tags.value) AS values' to get unique values */
FROM metrics
ARRAY JOIN tags
GROUP BY
host,
tags.key
MergeTree(created_date, (created_at), 8192)
此语法在 3 年多以前就已过时。尽量不要使用它。它最终会被取消。
ENGINE = MergeTree partition by toYYYYMM(created_date) ORDER by (created_at)
CREATE TABLE metrics
(
`created_date` Date,
`created_at` DateTime,
`host` LowCardinality(String),
`metrics_name` LowCardinality(String),
`tags` Nested(key LowCardinality(String), value LowCardinality(String))
)
ENGINE = MergeTree partition by toYYYYMM(created_date) ORDER by (created_at)
create materialized view metrics_mv (
host LowCardinality(String),
key LowCardinality(String),
u_values SimpleAggregateFunction(groupUniqArrayArray, Array(String))
)
Engine=AggregatingMergeTree order by (host, key) as
select host,
tags.key as key,
groupUniqArray(tags.value) as u_values
from metrics array join tags
group by host, key
insert into metrics values(today(), now(), 'h1', 'm1', ['k1','k2'], ['v1', 'v2']);
insert into metrics values(today(), now(), 'h1', 'm1', ['k1','k2'], ['v1', 'v2']);
insert into metrics values(today(), now(), 'h1', 'm1', ['k1','k3'], ['v11', 'v2']);
insert into metrics values(today(), now(), 'h2', 'm1', ['k1','k3'], ['v1', 'v22']);
optimize table metrics_mv final;
select * from metrics_mv;
┌─host─┬─key─┬─u_values─────┐
│ h1 │ k1 │ ['v11','v1'] │
│ h1 │ k2 │ ['v2'] │
│ h1 │ k3 │ ['v2'] │
│ h2 │ k1 │ ['v1'] │
│ h2 │ k3 │ ['v22'] │
└──────┴─────┴──────────────┘
select host, key, groupUniqArrayArray(u_values) values
from metrics_mv
group by host, key
┌─host─┬─key─┬─values───────┐
│ h1 │ k2 │ ['v2'] │
│ h2 │ k3 │ ['v22'] │
│ h1 │ k3 │ ['v2'] │
│ h1 │ k1 │ ['v11','v1'] │
│ h2 │ k1 │ ['v1'] │
└──────┴─────┴──────────────┘
table结构如下,
CREATE TABLE metrics
(
`created_date` Date,
`created_at` DateTime,
`host` LowCardinality(String),
`metrics_name` LowCardinality(String),
`tags` Nested(key LowCardinality(String), value LowCardinality(String))
)
ENGINE = MergeTree(created_date, (created_at), 8192)
如何生成结构类似于
的物化视图Host | Key | Values |
---|---|---|
host | Key: X | Values : ["uniq1", "uniq2"] |
考虑使用这个 MV:
CREATE MATERIALIZED VIEW metrics_mv
ENGINE = MergeTree()
ORDER BY host AS
SELECT
host,
tags.key key,
groupArray(tags.value) values /* or 'groupUniqArray(tags.value) AS values' to get unique values */
FROM metrics
ARRAY JOIN tags
GROUP BY
host,
tags.key
MergeTree(created_date, (created_at), 8192)
此语法在 3 年多以前就已过时。尽量不要使用它。它最终会被取消。
ENGINE = MergeTree partition by toYYYYMM(created_date) ORDER by (created_at)
CREATE TABLE metrics ( `created_date` Date, `created_at` DateTime, `host` LowCardinality(String), `metrics_name` LowCardinality(String), `tags` Nested(key LowCardinality(String), value LowCardinality(String)) ) ENGINE = MergeTree partition by toYYYYMM(created_date) ORDER by (created_at) create materialized view metrics_mv ( host LowCardinality(String), key LowCardinality(String), u_values SimpleAggregateFunction(groupUniqArrayArray, Array(String)) ) Engine=AggregatingMergeTree order by (host, key) as select host, tags.key as key, groupUniqArray(tags.value) as u_values from metrics array join tags group by host, key insert into metrics values(today(), now(), 'h1', 'm1', ['k1','k2'], ['v1', 'v2']); insert into metrics values(today(), now(), 'h1', 'm1', ['k1','k2'], ['v1', 'v2']); insert into metrics values(today(), now(), 'h1', 'm1', ['k1','k3'], ['v11', 'v2']); insert into metrics values(today(), now(), 'h2', 'm1', ['k1','k3'], ['v1', 'v22']); optimize table metrics_mv final; select * from metrics_mv; ┌─host─┬─key─┬─u_values─────┐ │ h1 │ k1 │ ['v11','v1'] │ │ h1 │ k2 │ ['v2'] │ │ h1 │ k3 │ ['v2'] │ │ h2 │ k1 │ ['v1'] │ │ h2 │ k3 │ ['v22'] │ └──────┴─────┴──────────────┘ select host, key, groupUniqArrayArray(u_values) values from metrics_mv group by host, key ┌─host─┬─key─┬─values───────┐ │ h1 │ k2 │ ['v2'] │ │ h2 │ k3 │ ['v22'] │ │ h1 │ k3 │ ['v2'] │ │ h1 │ k1 │ ['v11','v1'] │ │ h2 │ k1 │ ['v1'] │ └──────┴─────┴──────────────┘