使用 PostgreSQL 进行简单但缓慢的 SQL 查询
Simple yet slow SQL query with PostgreSQL
我有两个表要加入:
CREATE TABLE public."order" (
id uuid NOT NULL,
side varchar(4) NOT NULL,
product varchar(7) NOT NULL,
price numeric(18,8) NOT NULL,
close_time timestamp NULL,
CONSTRAINT order_pkey PRIMARY KEY (id)
);
CREATE TABLE public.order_history (
id serial NOT NULL,
amount numeric(18,8) NOT NULL,
"time" timestamp NOT NULL,
order_id uuid NOT NULL,
CONSTRAINT order_history_pkey PRIMARY KEY (id),
CONSTRAINT order_history_order_id_fkey FOREIGN KEY (order_id) REFERENCES "order"(id)
);
CREATE INDEX order_history_order_id ON public.order_history USING btree (order_id);
CREATE INDEX order_history_time_idx ON public.order_history USING btree ("time");
我的查询非常简单,但在我的 HDD 上只需要几分钟(我的一个朋友将相同的数据库存储在 SSD 上,显然速度更快,但仍然超出了我愿意等待的合理时间):
select
"t1"."id",
"t1"."side",
"t1"."price",
"t1"."close_time",
"t2"."time",
"t2"."amount"
from
"order" as "t1"
inner join "order_history" as "t2" on
("t2"."order_id" = "t1"."id")
where
((("t2"."time" <= '2018-03-28 08:00:00')
and (("t1"."close_time" > '2018-03-28 07:00:00')
or ("t1"."close_time" is null)))
and ("t1"."product" = 'BTC-USD'))
order by
"t2"."time"
这是 EXPLAIN(ANALYZE, BUFFERS)
输出:
Gather Merge (cost=3293333.15..3673129.97 rows=3255174 width=47) (actual time=195630.667..195668.246 rows=83766 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=346185 read=948128, temp read=402275 written=402089
-> Sort (cost=3292333.13..3296402.10 rows=1627587 width=47) (actual time=193748.573..193751.027 rows=27922 loops=3)
Sort Key: t2."time"
Sort Method: quicksort Memory: 4853kB
Buffers: shared hit=346185 read=948128, temp read=402275 written=402089
-> Hash Join (cost=1315861.90..3074345.01 rows=1627587 width=47) (actual time=65363.240..193703.738 rows=27922 loops=3)
Hash Cond: (t1.id = t2.order_id)
Buffers: shared hit=346172 read=948127, temp read=402275 written=402089
-> Parallel Seq Scan on "order" t1 (cost=0.00..1293501.00 rows=11021971 width=34) (actual time=0.122..78296.478 rows=8629896 loops=3)
Filter: (((close_time > '2018-03-28 07:00:00'::timestamp without time zone) OR (close_time IS NULL)) AND ((product)::text = 'BTC-USD'::text))
Rows Removed by Filter: 19019229
Buffers: shared hit=13 read=775079
-> Hash (cost=1079028.57..1079028.57 rows=12248346 width=29) (actual time=65107.372..65107.372 rows=12358141 loops=3)
Buckets: 524288 Batches: 32 Memory Usage: 27473kB
Buffers: shared hit=346071 read=173036, temp written=218295
-> Bitmap Heap Scan on order_history t2 (cost=229265.25..1079028.57 rows=12248346 width=29) (actual time=2951.352..61701.142 rows=12358141 loops=3)
Recheck Cond: ("time" <= '2018-03-28 08:00:00'::timestamp without time zone)
Heap Blocks: exact=139266
Buffers: shared hit=346071 read=173036
-> Bitmap Index Scan on order_history_time_idx (cost=0.00..226203.16 rows=12248346 width=0) (actual time=2925.500..2925.500 rows=12358141 loops=3)
Index Cond: ("time" <= '2018-03-28 08:00:00'::timestamp without time zone)
Buffers: shared hit=67539 read=33770
Planning time: 0.444 ms
Execution time: 195672.969 ms
我不知道为什么这个简单的查询这么慢,我只是设法通过在 order_history.time 上创建索引来稍微加快它的速度,仅此而已。欢迎任何建议!
您的查询正在搅动大量数据,因此预计速度会很慢。
order
上的顺序扫描是最快的方法,因为您需要 table 中一半的行。
总而言之,PostgreSQL 需要读取超过一百万个块来计算结果并处理所有这些数据,所以三分钟也不错。
我怀疑这个查询是否可以更快。
time
是(时间戳!)列的错误名称,避免使用它
order
对 table 来说是一个不好的名字,避免使用它
- `close_time'
缺少一个索引
- 尽量避免在时间戳(几乎 键字段)列中使用 NULL,避免 `... OR xxx IS NULL
- 相反,您可以使用合理的默认值,例如
now()
或 +/-infinity
- 最后的排序步骤可能会破坏您的愉快计划。
此外:您可能不需要 order_history
上的代理键 id
。自然键 (order_id,ztime)
就足够了。
\i tmp.sql
CREATE TABLE orders (
id uuid NOT NULL
, side varchar(4) NOT NULL
, product varchar(7) NOT NULL
, price numeric(18,8) NOT NULL
, close_time timestamp NOT NULL DEFAULT ('infinity'::timestamp)
, CONSTRAINT order_pkey PRIMARY KEY (id)
);
CREATE TABLE order_history (
id serial NOT NULL
, amount numeric(18,8) NOT NULL
, ztime timestamp NOT NULL DEFAULT ('-infinity'::timestamp)
, order_id uuid NOT NULL
, CONSTRAINT order_history_pkey PRIMARY KEY (id)
, CONSTRAINT order_history_order_id_fkey FOREIGN KEY (order_id) REFERENCES orders(id)
);
-- CREATE INDEX order_history_order_id ON order_history USING btree (order_id);
-- CREATE INDEX order_history_time_idx ON order_history USING btree (ztime);
CREATE INDEX order_history_order_id_ztime ON order_history USING btree (order_id,ztime);
CREATE INDEX order_h_ztime ON orders USING btree (close_time);
EXPLAIN
select
oo.id
, oo.side
, oo.price
, oo.close_time
, oh.ztime
, oh.amount
from
orders as oo
inner join order_history as oh on oh.order_id = oo.id
where oh.ztime <= '2018-03-28 08:00:00'
and (oo.close_time > '2018-03-28 07:00:00' ) -- or oo.close_time is null)
and oo.product = 'BTC-USD'
order by oh.ztime
;
生成的计划(没有任何数据!!!):
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------
Sort (cost=16.98..16.99 rows=1 width=92)
Sort Key: oh.ztime
-> Nested Loop (cost=3.14..16.98 rows=1 width=92)
-> Bitmap Heap Scan on orders oo (cost=1.94..13.64 rows=1 width=64)
Recheck Cond: (close_time > '2018-03-28 07:00:00'::timestamp without time zone)
Filter: ((product)::text = 'BTC-USD'::text)
-> Bitmap Index Scan on order_h_ztime (cost=0.00..1.94 rows=213 width=0)
Index Cond: (close_time > '2018-03-28 07:00:00'::timestamp without time zone)
-> Bitmap Heap Scan on order_history oh (cost=1.20..3.33 rows=2 width=44)
Recheck Cond: ((order_id = oo.id) AND (ztime <= '2018-03-28 08:00:00'::timestamp without time zone))
-> Bitmap Index Scan on order_history_order_id_ztime (cost=0.00..1.20 rows=2 width=0)
Index Cond: ((order_id = oo.id) AND (ztime <= '2018-03-28 08:00:00'::timestamp without time zone))
(12 rows)
我有两个表要加入:
CREATE TABLE public."order" (
id uuid NOT NULL,
side varchar(4) NOT NULL,
product varchar(7) NOT NULL,
price numeric(18,8) NOT NULL,
close_time timestamp NULL,
CONSTRAINT order_pkey PRIMARY KEY (id)
);
CREATE TABLE public.order_history (
id serial NOT NULL,
amount numeric(18,8) NOT NULL,
"time" timestamp NOT NULL,
order_id uuid NOT NULL,
CONSTRAINT order_history_pkey PRIMARY KEY (id),
CONSTRAINT order_history_order_id_fkey FOREIGN KEY (order_id) REFERENCES "order"(id)
);
CREATE INDEX order_history_order_id ON public.order_history USING btree (order_id);
CREATE INDEX order_history_time_idx ON public.order_history USING btree ("time");
我的查询非常简单,但在我的 HDD 上只需要几分钟(我的一个朋友将相同的数据库存储在 SSD 上,显然速度更快,但仍然超出了我愿意等待的合理时间):
select
"t1"."id",
"t1"."side",
"t1"."price",
"t1"."close_time",
"t2"."time",
"t2"."amount"
from
"order" as "t1"
inner join "order_history" as "t2" on
("t2"."order_id" = "t1"."id")
where
((("t2"."time" <= '2018-03-28 08:00:00')
and (("t1"."close_time" > '2018-03-28 07:00:00')
or ("t1"."close_time" is null)))
and ("t1"."product" = 'BTC-USD'))
order by
"t2"."time"
这是 EXPLAIN(ANALYZE, BUFFERS)
输出:
Gather Merge (cost=3293333.15..3673129.97 rows=3255174 width=47) (actual time=195630.667..195668.246 rows=83766 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=346185 read=948128, temp read=402275 written=402089
-> Sort (cost=3292333.13..3296402.10 rows=1627587 width=47) (actual time=193748.573..193751.027 rows=27922 loops=3)
Sort Key: t2."time"
Sort Method: quicksort Memory: 4853kB
Buffers: shared hit=346185 read=948128, temp read=402275 written=402089
-> Hash Join (cost=1315861.90..3074345.01 rows=1627587 width=47) (actual time=65363.240..193703.738 rows=27922 loops=3)
Hash Cond: (t1.id = t2.order_id)
Buffers: shared hit=346172 read=948127, temp read=402275 written=402089
-> Parallel Seq Scan on "order" t1 (cost=0.00..1293501.00 rows=11021971 width=34) (actual time=0.122..78296.478 rows=8629896 loops=3)
Filter: (((close_time > '2018-03-28 07:00:00'::timestamp without time zone) OR (close_time IS NULL)) AND ((product)::text = 'BTC-USD'::text))
Rows Removed by Filter: 19019229
Buffers: shared hit=13 read=775079
-> Hash (cost=1079028.57..1079028.57 rows=12248346 width=29) (actual time=65107.372..65107.372 rows=12358141 loops=3)
Buckets: 524288 Batches: 32 Memory Usage: 27473kB
Buffers: shared hit=346071 read=173036, temp written=218295
-> Bitmap Heap Scan on order_history t2 (cost=229265.25..1079028.57 rows=12248346 width=29) (actual time=2951.352..61701.142 rows=12358141 loops=3)
Recheck Cond: ("time" <= '2018-03-28 08:00:00'::timestamp without time zone)
Heap Blocks: exact=139266
Buffers: shared hit=346071 read=173036
-> Bitmap Index Scan on order_history_time_idx (cost=0.00..226203.16 rows=12248346 width=0) (actual time=2925.500..2925.500 rows=12358141 loops=3)
Index Cond: ("time" <= '2018-03-28 08:00:00'::timestamp without time zone)
Buffers: shared hit=67539 read=33770
Planning time: 0.444 ms
Execution time: 195672.969 ms
我不知道为什么这个简单的查询这么慢,我只是设法通过在 order_history.time 上创建索引来稍微加快它的速度,仅此而已。欢迎任何建议!
您的查询正在搅动大量数据,因此预计速度会很慢。
order
上的顺序扫描是最快的方法,因为您需要 table 中一半的行。
总而言之,PostgreSQL 需要读取超过一百万个块来计算结果并处理所有这些数据,所以三分钟也不错。
我怀疑这个查询是否可以更快。
time
是(时间戳!)列的错误名称,避免使用它order
对 table 来说是一个不好的名字,避免使用它- `close_time' 缺少一个索引
- 尽量避免在时间戳(几乎 键字段)列中使用 NULL,避免 `... OR xxx IS NULL
- 相反,您可以使用合理的默认值,例如
now()
或+/-infinity
- 最后的排序步骤可能会破坏您的愉快计划。
此外:您可能不需要 order_history
上的代理键 id
。自然键 (order_id,ztime)
就足够了。
\i tmp.sql
CREATE TABLE orders (
id uuid NOT NULL
, side varchar(4) NOT NULL
, product varchar(7) NOT NULL
, price numeric(18,8) NOT NULL
, close_time timestamp NOT NULL DEFAULT ('infinity'::timestamp)
, CONSTRAINT order_pkey PRIMARY KEY (id)
);
CREATE TABLE order_history (
id serial NOT NULL
, amount numeric(18,8) NOT NULL
, ztime timestamp NOT NULL DEFAULT ('-infinity'::timestamp)
, order_id uuid NOT NULL
, CONSTRAINT order_history_pkey PRIMARY KEY (id)
, CONSTRAINT order_history_order_id_fkey FOREIGN KEY (order_id) REFERENCES orders(id)
);
-- CREATE INDEX order_history_order_id ON order_history USING btree (order_id);
-- CREATE INDEX order_history_time_idx ON order_history USING btree (ztime);
CREATE INDEX order_history_order_id_ztime ON order_history USING btree (order_id,ztime);
CREATE INDEX order_h_ztime ON orders USING btree (close_time);
EXPLAIN
select
oo.id
, oo.side
, oo.price
, oo.close_time
, oh.ztime
, oh.amount
from
orders as oo
inner join order_history as oh on oh.order_id = oo.id
where oh.ztime <= '2018-03-28 08:00:00'
and (oo.close_time > '2018-03-28 07:00:00' ) -- or oo.close_time is null)
and oo.product = 'BTC-USD'
order by oh.ztime
;
生成的计划(没有任何数据!!!):
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------
Sort (cost=16.98..16.99 rows=1 width=92)
Sort Key: oh.ztime
-> Nested Loop (cost=3.14..16.98 rows=1 width=92)
-> Bitmap Heap Scan on orders oo (cost=1.94..13.64 rows=1 width=64)
Recheck Cond: (close_time > '2018-03-28 07:00:00'::timestamp without time zone)
Filter: ((product)::text = 'BTC-USD'::text)
-> Bitmap Index Scan on order_h_ztime (cost=0.00..1.94 rows=213 width=0)
Index Cond: (close_time > '2018-03-28 07:00:00'::timestamp without time zone)
-> Bitmap Heap Scan on order_history oh (cost=1.20..3.33 rows=2 width=44)
Recheck Cond: ((order_id = oo.id) AND (ztime <= '2018-03-28 08:00:00'::timestamp without time zone))
-> Bitmap Index Scan on order_history_order_id_ztime (cost=0.00..1.20 rows=2 width=0)
Index Cond: ((order_id = oo.id) AND (ztime <= '2018-03-28 08:00:00'::timestamp without time zone))
(12 rows)