在简单的 postgres 和 timescaleDB 查询性能测试中没有看到任何提升?
Didn't see any boost in a simple postgres and timescaleDB query performance test?
我尝试了 postgres 和 timescaleDB 之间的简单性能测试。
这是我的结果:-
总行数 403,204
使用 Postgres
聚合查询 176 行的获取时间:203 毫秒 - 240 毫秒
连接查询 102 行的获取时间:660 毫秒 - 720 毫秒
使用TimescaleDB
聚合查询 176 行的获取时间:175 毫秒 - 200 毫秒
连接查询 102 行的获取时间:614 毫秒 - 650 毫秒
CREATE TABLE public.sensors(
id SERIAL PRIMARY KEY,
type VARCHAR(50),
location VARCHAR(50)
);
-- Postgres table
CREATE TABLE sensor_data (
time TIMESTAMPTZ NOT NULL,
sensor_id INTEGER,
temperature DOUBLE PRECISION,
cpu DOUBLE PRECISION,
FOREIGN KEY (sensor_id) REFERENCES sensors (id)
);
--drop table public.sensor_data;
-- TimescaleDB table
CREATE TABLE sensor_data_ts (
time TIMESTAMPTZ NOT NULL,
sensor_id INTEGER,
temperature DOUBLE PRECISION,
cpu DOUBLE PRECISION,
FOREIGN KEY (sensor_id) REFERENCES sensors (id)
);
SELECT create_hypertable('sensor_data_ts', 'time');
-- Insert Data
INSERT INTO sensors (type, location) VALUES
('a','floor'),
('a', 'ceiling'),
('b','floor'),
('b', 'ceiling');
-- Postgres
INSERT INTO sensor_data (time, sensor_id, cpu, temperature)
SELECT
time,
sensor_id,
random() AS cpu,
random()*100 AS temperature
FROM generate_series(now() - interval '50 week', now(), interval '5 minute') AS g1(time), generate_series(1,4,1) AS g2(sensor_id);
-- TimescaleDB
INSERT INTO sensor_data_ts (time, sensor_id, cpu, temperature)
SELECT
time,
sensor_id,
random() AS cpu,
random()*100 AS temperature
FROM generate_series(now() - interval '50 week', now(), interval '5 minute') AS g1(time), generate_series(1,4,1) AS g2(sensor_id);
--truncate table public.sensor_data;
--truncate table public.sensor_data_ts;
select count(*) from public.sensor_data sd ;
select count(*) from public.sensor_data_ts sd ;
--Postgres
--Aggregate queries
SELECT
floor(extract(epoch from "time")/(60*60*24*2)) as period,
AVG(temperature) AS avg_temp,
AVG(cpu) AS avg_cpu
FROM sensor_data
GROUP BY period;
--ORDER BY PERIOD;
--Join Queries
SELECT
sensors.location,
floor(extract(epoch from "time")/(60*60*24*7)) as period,
AVG(temperature) AS avg_temp,
last(temperature, time) AS last_temp,
AVG(cpu) AS avg_cpu
FROM sensor_data JOIN sensors on sensor_data.sensor_id = sensors.id
GROUP BY period, sensors.location;
--Timescale DB
--Aggregate Queries
SELECT
time_bucket('2 day', time) AS period,
AVG(temperature) AS avg_temp,
AVG(cpu) AS avg_cpu
FROM sensor_data_ts
GROUP BY period;
--ORDER BY PERIOD;
--Join Queries
SELECT
sensors.location,
time_bucket('1 week', time) AS period,
AVG(temperature) AS avg_temp,
last(temperature, time) AS last_temp,
AVG(cpu) AS avg_cpu
FROM sensor_data JOIN sensors on sensor_data.sensor_id = sensors.id
GROUP BY period, sensors.location;
我期待查询性能有明显的提升。
我还能做些什么来提高查询性能?
几件事:
- 我有点困惑。 time_bucket 是一个 TimescaleDB 函数,不是 Postgres 函数,所以它可能是 运行 我们的一些代码。
- 您仍在对所有数据执行完整 table 扫描。这里没有太多的优化方法。而且数据集很小(400K),所以可以全部放入缓冲区缓存;如果您想查看一些 insert/query 性能,可能需要 (a) 更多数据,(b) 更复杂的查询类型。
- 但是TimescaleDB还有其他的特点。例如,打开压缩,您可能会发现这些“完整 table 扫描”速度更快(尽管一旦您进入 disk-bound 工作负载)。或者打开连续聚合,以便您可以 continuously/incrementally 实现这些结果以提供服务,例如 user-facing 仪表板。
我尝试了 postgres 和 timescaleDB 之间的简单性能测试。 这是我的结果:-
总行数 403,204
使用 Postgres
聚合查询 176 行的获取时间:203 毫秒 - 240 毫秒
连接查询 102 行的获取时间:660 毫秒 - 720 毫秒
使用TimescaleDB
聚合查询 176 行的获取时间:175 毫秒 - 200 毫秒
连接查询 102 行的获取时间:614 毫秒 - 650 毫秒
CREATE TABLE public.sensors(
id SERIAL PRIMARY KEY,
type VARCHAR(50),
location VARCHAR(50)
);
-- Postgres table
CREATE TABLE sensor_data (
time TIMESTAMPTZ NOT NULL,
sensor_id INTEGER,
temperature DOUBLE PRECISION,
cpu DOUBLE PRECISION,
FOREIGN KEY (sensor_id) REFERENCES sensors (id)
);
--drop table public.sensor_data;
-- TimescaleDB table
CREATE TABLE sensor_data_ts (
time TIMESTAMPTZ NOT NULL,
sensor_id INTEGER,
temperature DOUBLE PRECISION,
cpu DOUBLE PRECISION,
FOREIGN KEY (sensor_id) REFERENCES sensors (id)
);
SELECT create_hypertable('sensor_data_ts', 'time');
-- Insert Data
INSERT INTO sensors (type, location) VALUES
('a','floor'),
('a', 'ceiling'),
('b','floor'),
('b', 'ceiling');
-- Postgres
INSERT INTO sensor_data (time, sensor_id, cpu, temperature)
SELECT
time,
sensor_id,
random() AS cpu,
random()*100 AS temperature
FROM generate_series(now() - interval '50 week', now(), interval '5 minute') AS g1(time), generate_series(1,4,1) AS g2(sensor_id);
-- TimescaleDB
INSERT INTO sensor_data_ts (time, sensor_id, cpu, temperature)
SELECT
time,
sensor_id,
random() AS cpu,
random()*100 AS temperature
FROM generate_series(now() - interval '50 week', now(), interval '5 minute') AS g1(time), generate_series(1,4,1) AS g2(sensor_id);
--truncate table public.sensor_data;
--truncate table public.sensor_data_ts;
select count(*) from public.sensor_data sd ;
select count(*) from public.sensor_data_ts sd ;
--Postgres
--Aggregate queries
SELECT
floor(extract(epoch from "time")/(60*60*24*2)) as period,
AVG(temperature) AS avg_temp,
AVG(cpu) AS avg_cpu
FROM sensor_data
GROUP BY period;
--ORDER BY PERIOD;
--Join Queries
SELECT
sensors.location,
floor(extract(epoch from "time")/(60*60*24*7)) as period,
AVG(temperature) AS avg_temp,
last(temperature, time) AS last_temp,
AVG(cpu) AS avg_cpu
FROM sensor_data JOIN sensors on sensor_data.sensor_id = sensors.id
GROUP BY period, sensors.location;
--Timescale DB
--Aggregate Queries
SELECT
time_bucket('2 day', time) AS period,
AVG(temperature) AS avg_temp,
AVG(cpu) AS avg_cpu
FROM sensor_data_ts
GROUP BY period;
--ORDER BY PERIOD;
--Join Queries
SELECT
sensors.location,
time_bucket('1 week', time) AS period,
AVG(temperature) AS avg_temp,
last(temperature, time) AS last_temp,
AVG(cpu) AS avg_cpu
FROM sensor_data JOIN sensors on sensor_data.sensor_id = sensors.id
GROUP BY period, sensors.location;
我期待查询性能有明显的提升。 我还能做些什么来提高查询性能?
几件事:
- 我有点困惑。 time_bucket 是一个 TimescaleDB 函数,不是 Postgres 函数,所以它可能是 运行 我们的一些代码。
- 您仍在对所有数据执行完整 table 扫描。这里没有太多的优化方法。而且数据集很小(400K),所以可以全部放入缓冲区缓存;如果您想查看一些 insert/query 性能,可能需要 (a) 更多数据,(b) 更复杂的查询类型。
- 但是TimescaleDB还有其他的特点。例如,打开压缩,您可能会发现这些“完整 table 扫描”速度更快(尽管一旦您进入 disk-bound 工作负载)。或者打开连续聚合,以便您可以 continuously/incrementally 实现这些结果以提供服务,例如 user-facing 仪表板。