为什么 Clickhouse 比 PostgreSQL 慢?
Why is Clickhouse slower than PostgreSQL?
我想将 Clickhouse 用作 OLAP,将 PostgreSQL 用作 OLTP
数据库。
问题是查询 Clickhouse 运行 比在 Postgres 上慢。查询如下:
select count(id) from {table_name}
这是我的 table 结构:
CREATE TABLE IF NOT EXISTS {table_name}
(
`id` UInt64,
`label` Nullable(FixedString(50)),
`query` Nullable(text),
`creation_datetime` DateTime,
`offset` UInt64,
`user_is_first_search` UInt8,
`user_date_of_start` Date,
`usage_type` Nullable(FixedString(20)),
`user_ip` Nullable(FixedString(200)),
`who_searched_query` Nullable(FixedString(15)),
`device_type` Nullable(FixedString(20)),
`device_os` Nullable(FixedString(20)),
`tab_type` Nullable(FixedString(20)),
`response_api_type` Nullable(FixedString(20)),
`total_response_time` Float64,
`retrieved_instant_answer` Nullable(FixedString(100)),
`is_relative_instant_answer` UInt8,
`meta_search_instant_answer_type` Nullable(FixedString(50)),
`settings_alignment` Nullable(FixedString(20)),
`settings_safe_search` Nullable(FixedString(30)),
`settings_search_results_number` Nullable(FixedString(30)),
`settings_proxy_image_urls` Nullable(FixedString(30)),
`cache_hit` Nullable(FixedString(20)),
`net_status` Nullable(FixedString(20)),
`is_transitional` UInt8
)
ENGINE = MergeTree() PARTITION BY creation_datetime ORDER BY (id)
我在两个数据库中的日期时间字段上创建了一个索引,然后 运行 optimize
对两个数据库都进行了查询。谁能告诉我为什么 Clickhouse 比 Postgres 慢?
Clickhouse 有很多方法可以让你大吃一惊
create table test ( id Int64, d Date ) Engine=MergeTree Order by id;
insert into test select number, today() from numbers(1e9);
select count() from test;
┌───count()─┐
│ 100000000 │
└───────────┘
1 rows in set. Elapsed: 0.002 sec.
select count(id) from test;
┌─count(id)─┐
│ 100000000 │
└───────────┘
1 rows in set. Elapsed: 0.239 sec. Processed 100.00 million rows, 800.00 MB (418.46 million rows/s., 3.35 GB/s.)
drop table test;
create table test ( id Int64, d Int64 ) Engine=MergeTree partition by (intDiv(d, 10000)) Order by id;
set max_partitions_per_insert_block=0;
insert into test select number, number from numbers(1e8);
select count(id) from test;
┌─count(id)─┐
│ 100000000 │
└───────────┘
1 rows in set. Elapsed: 1.050 sec. Processed 100.00 million rows, 800.00 MB (95.20 million rows/s., 761.61 MB/s.)
select count(d) from test;
┌──count(d)─┐
│ 100000000 │
└───────────┘
1 rows in set. Elapsed: 0.004 sec.
终于找到我做错的地方了。我不应该按日期时间字段进行分区。我创建了没有分区的 table,它变得非常快。
我想将 Clickhouse 用作 OLAP,将 PostgreSQL 用作 OLTP 数据库。
问题是查询 Clickhouse 运行 比在 Postgres 上慢。查询如下:
select count(id) from {table_name}
这是我的 table 结构:
CREATE TABLE IF NOT EXISTS {table_name}
(
`id` UInt64,
`label` Nullable(FixedString(50)),
`query` Nullable(text),
`creation_datetime` DateTime,
`offset` UInt64,
`user_is_first_search` UInt8,
`user_date_of_start` Date,
`usage_type` Nullable(FixedString(20)),
`user_ip` Nullable(FixedString(200)),
`who_searched_query` Nullable(FixedString(15)),
`device_type` Nullable(FixedString(20)),
`device_os` Nullable(FixedString(20)),
`tab_type` Nullable(FixedString(20)),
`response_api_type` Nullable(FixedString(20)),
`total_response_time` Float64,
`retrieved_instant_answer` Nullable(FixedString(100)),
`is_relative_instant_answer` UInt8,
`meta_search_instant_answer_type` Nullable(FixedString(50)),
`settings_alignment` Nullable(FixedString(20)),
`settings_safe_search` Nullable(FixedString(30)),
`settings_search_results_number` Nullable(FixedString(30)),
`settings_proxy_image_urls` Nullable(FixedString(30)),
`cache_hit` Nullable(FixedString(20)),
`net_status` Nullable(FixedString(20)),
`is_transitional` UInt8
)
ENGINE = MergeTree() PARTITION BY creation_datetime ORDER BY (id)
我在两个数据库中的日期时间字段上创建了一个索引,然后 运行 optimize
对两个数据库都进行了查询。谁能告诉我为什么 Clickhouse 比 Postgres 慢?
Clickhouse 有很多方法可以让你大吃一惊
create table test ( id Int64, d Date ) Engine=MergeTree Order by id;
insert into test select number, today() from numbers(1e9);
select count() from test;
┌───count()─┐
│ 100000000 │
└───────────┘
1 rows in set. Elapsed: 0.002 sec.
select count(id) from test;
┌─count(id)─┐
│ 100000000 │
└───────────┘
1 rows in set. Elapsed: 0.239 sec. Processed 100.00 million rows, 800.00 MB (418.46 million rows/s., 3.35 GB/s.)
drop table test;
create table test ( id Int64, d Int64 ) Engine=MergeTree partition by (intDiv(d, 10000)) Order by id;
set max_partitions_per_insert_block=0;
insert into test select number, number from numbers(1e8);
select count(id) from test;
┌─count(id)─┐
│ 100000000 │
└───────────┘
1 rows in set. Elapsed: 1.050 sec. Processed 100.00 million rows, 800.00 MB (95.20 million rows/s., 761.61 MB/s.)
select count(d) from test;
┌──count(d)─┐
│ 100000000 │
└───────────┘
1 rows in set. Elapsed: 0.004 sec.
终于找到我做错的地方了。我不应该按日期时间字段进行分区。我创建了没有分区的 table,它变得非常快。