过滤视图时如何利用底层索引?
How to utilize an underlying INDEX when filtering a VIEW?
我有一个非常简单的查询,执行速度出奇地慢,原因是它在 执行 JOIN 之后对视图进行扫描。看到这一点我有点惊讶,因为我期待 Postgres 在 加入 之前进行过滤,看到其中一个基础 tables 在列上有一个 INDEX将被过滤。
有什么方法可以让我以某种方式重新排序查询或提示计划者如何以不同的方式执行查询?
请注意,我 确实 知道如何通过直接访问底层 table 来解决这个问题,但该视图隐藏了一些复杂性很高兴避免查询。
查询
select * from form where encounter_id= 23728 and type = 'vitals';
解释分析
Subquery Scan on form (cost=0.57..3439.07 rows=1 width=622) (actual time=8.187..8.187 rows=0 loops=1)
Filter: ((form.encounter_id = 23728) AND (form.type = 'vitals'::text))
Rows Removed by Filter: 12000
-> Unique (cost=0.57..3259.07 rows=12000 width=626) (actual time=0.008..7.612 rows=12000 loops=1)
-> Merge Join (cost=0.57..3229.07 rows=12000 width=626) (actual time=0.007..5.485 rows=12000 loops=1)
Merge Cond: (fd.form_id = f.id)
-> Index Scan using _idx_form_details on _form_details fd (cost=0.29..2636.78 rows=12000 width=603) (actual time=0.003..1.918 rows=12000 loops=1)
-> Index Scan using pk_form on _form f (cost=0.29..412.29 rows=12000 width=27) (actual time=0.002..1.214 rows=12000 loops=1)
Planning time: 0.170 ms
Execution time: 8.212 ms
TABLE 和 VIEW 定义
CREATE TABLE _form (
id INT NOT NULL,
encounter_id INT REFERENCES _encounter (id) NOT NULL,
type TEXT NOT NULL,
CONSTRAINT pk_form PRIMARY KEY (id),
FOREIGN KEY (cid) REFERENCES _user_in_role (id)
);
CREATE INDEX encounter_id ON _form (encounter_id, type);
CREATE TABLE _form_details (
id INT NOT NULL,
form_id INT REFERENCES _form (id) NOT NULL,
archived BOOLEAN NOT NULL DEFAULT FALSE,
CONSTRAINT pk_form_details PRIMARY KEY (id),
FOREIGN KEY (cid) REFERENCES _user_in_role (id)
);
CREATE VIEW form AS
SELECT DISTINCT ON (f.id)
f.id,
f.encounter_id,
f.type,
fd.archived,
f.cid
FROM _form f
JOIN _form_details fd
ON (f.id = fd.form_id)
ORDER BY f.id, fd.id DESC;
编辑:
有人发布了一个答案(随后被删除),其中包含一个重要的信息花絮:即使基础 table 中的 encounter_id
列被索引,VIEW 中的 ORDER BY
操作也达不到目的。不幸的是,我们无法摆脱 ORDER BY
,因为它是 DISTINCT ON
工作所必需的。
DISTINCT ON ... ORDER BY
是性能杀手(子查询不能分解)
form_id INT REFERENCES _form (id)
FK 缺少一个索引
- a
NOT EXISTS()
反连接,或者row_number()
可以用来避免DISTINCT
子查询
SET search_path=tmp;
/***/
\i tmp.sql
CREATE TABLE tform (
id INT NOT NULL
, encounter_id INT NOT NULL -- REFERENCES tencounter (id)
, ztype TEXT NOT NULL
, CONSTRAINT pk_form PRIMARY KEY (id)
-- FOREIGN KEY (cid) REFERENCES _user_in_role (id)
);
CREATE TABLE tform_details (
id INT NOT NULL
, form_id INT REFERENCES tform (id) NOT NULL
, archived BOOLEAN NOT NULL DEFAULT FALSE
, CONSTRAINT pk_form_details PRIMARY KEY (id)
-- , FOREIGN KEY (cid) REFERENCES _user_in_role (id)
);
-- ALTER TABLE tform ADD FOREIGN KEY(encounter_id) REFERENCES tencounter (id) ;
CREATE INDEX encounter_id ON tform (encounter_id, ztype);
INSERT INTO tform (id, encounter_id, ztype)
SELECT gs, 23720+gs%29, 'ztype_' || gs::text
FROM generate_series(1,10000) gs
;
INSERT INTO tform_details (id, form_id, archived)
SELECT 10000*gs+tf.id, tf.id, (random() > 0.3) ::boolean
FROM tform tf
CROSS JOIN generate_series(0,22) gs
;
UPDATE tform
SET ztype = 'vitals'
WHERE random() < 0.2;
/***/
DROP INDEX xxxx ;
CREATE UNIQUE INDEX xxxx ON tform_details (form_id, id);
VACUUM ANALYZE tform;
VACUUM ANALYZE tform_details;
\d tform;
\d tform_details;
select COUNT(*) FROM tform;
select COUNT(*) FROM tform_details;
DROP VIEW form ;
CREATE VIEW form AS
SELECT DISTINCT ON (f.id)
f.id
, f.encounter_id
, f.ztype
, fd.archived
-- , f.cid
FROM tform f
JOIN tform_details fd ON f.id = fd.form_id
ORDER BY f.id, fd.id DESC
;
DROP VIEW form2 ;
CREATE VIEW form2 AS
SELECT f.id
, f.encounter_id
, f.ztype
, fd.archived
FROM tform f
JOIN tform_details fd
ON f.id = fd.form_id
WHERE NOT EXISTS ( SELECT *
FROM tform_details nx
WHERE nx.form_id = fd.form_id
AND nx.id > fd.id
)
;
DROP VIEW form3 ;
CREATE VIEW form3 AS
SELECT f.id
, f.encounter_id
, f.ztype
, fd.archived
FROM tform f
JOIN ( select xx.form_id, xx.archived
, row_number() OVER (PARTITION BY xx.form_id ORDER BY xx.id DESC) AS rn
FROM tform_details xx
) fd ON f.id = fd.form_id AND fd.rn = 1
;
\echo burn-in
EXPLAIN ANALYZE
select * from form where encounter_id= 23728 and ztype = 'vitals' ;
\echo plain
EXPLAIN ANALYZE
select * from form where encounter_id= 23728 and ztype = 'vitals' ;
EXPLAIN ANALYZE
select * from form2 where encounter_id= 23728 and ztype = 'vitals' ;
EXPLAIN ANALYZE
select * from form3 where encounter_id= 23728 and ztype = 'vitals' ;
\echo no_hash
SET enable_hashjoin = False;
EXPLAIN ANALYZE
select * from form where encounter_id= 23728 and ztype = 'vitals' ;
EXPLAIN ANALYZE
select * from form2 where encounter_id= 23728 and ztype = 'vitals' ;
EXPLAIN ANALYZE
select * from form3 where encounter_id= 23728 and ztype = 'vitals' ;
@a_horse_with_no_name 在聊天中给了我迄今为止最快的解决方案,但从未提供答案。因此,作为参考,这是他的解决方案,使用横向连接来创建视图。
CREATE VIEW form AS
SELECT f.id,
f.encounter_id,
f.type,
fd.archived,
f.cid
FROM _form f
JOIN LATERAL (
SELECT form_id, archived
FROM _form_details _fd
WHERE _fd.form_id = f.id
ORDER BY _fd.id DESC
LIMIT 1
) AS fd ON TRUE;
这比任何其他解决方案快 10 倍。如果创建为 form4
,基于与 @wildplasser 相同的表,它的执行方式如下:
select * from form where encounter_id= 23728 and ztype = 'vitals' ;
Time: 181.065 ms
select * from form2 where encounter_id= 23728 and ztype = 'vitals' ;
Time: 12.395 ms
select * from form3 where encounter_id= 23728 and ztype = 'vitals' ;
Time: 122.305 ms
select * from form4 where encounter_id= 23728 and ztype = 'vitals' ;
Time: 1.305 ms
Postgres 9.3 中引入的一些关于横向连接的好建议:
我有一个非常简单的查询,执行速度出奇地慢,原因是它在 执行 JOIN 之后对视图进行扫描。看到这一点我有点惊讶,因为我期待 Postgres 在 加入 之前进行过滤,看到其中一个基础 tables 在列上有一个 INDEX将被过滤。
有什么方法可以让我以某种方式重新排序查询或提示计划者如何以不同的方式执行查询?
请注意,我 确实 知道如何通过直接访问底层 table 来解决这个问题,但该视图隐藏了一些复杂性很高兴避免查询。
查询
select * from form where encounter_id= 23728 and type = 'vitals';
解释分析
Subquery Scan on form (cost=0.57..3439.07 rows=1 width=622) (actual time=8.187..8.187 rows=0 loops=1)
Filter: ((form.encounter_id = 23728) AND (form.type = 'vitals'::text))
Rows Removed by Filter: 12000
-> Unique (cost=0.57..3259.07 rows=12000 width=626) (actual time=0.008..7.612 rows=12000 loops=1)
-> Merge Join (cost=0.57..3229.07 rows=12000 width=626) (actual time=0.007..5.485 rows=12000 loops=1)
Merge Cond: (fd.form_id = f.id)
-> Index Scan using _idx_form_details on _form_details fd (cost=0.29..2636.78 rows=12000 width=603) (actual time=0.003..1.918 rows=12000 loops=1)
-> Index Scan using pk_form on _form f (cost=0.29..412.29 rows=12000 width=27) (actual time=0.002..1.214 rows=12000 loops=1)
Planning time: 0.170 ms
Execution time: 8.212 ms
TABLE 和 VIEW 定义
CREATE TABLE _form (
id INT NOT NULL,
encounter_id INT REFERENCES _encounter (id) NOT NULL,
type TEXT NOT NULL,
CONSTRAINT pk_form PRIMARY KEY (id),
FOREIGN KEY (cid) REFERENCES _user_in_role (id)
);
CREATE INDEX encounter_id ON _form (encounter_id, type);
CREATE TABLE _form_details (
id INT NOT NULL,
form_id INT REFERENCES _form (id) NOT NULL,
archived BOOLEAN NOT NULL DEFAULT FALSE,
CONSTRAINT pk_form_details PRIMARY KEY (id),
FOREIGN KEY (cid) REFERENCES _user_in_role (id)
);
CREATE VIEW form AS
SELECT DISTINCT ON (f.id)
f.id,
f.encounter_id,
f.type,
fd.archived,
f.cid
FROM _form f
JOIN _form_details fd
ON (f.id = fd.form_id)
ORDER BY f.id, fd.id DESC;
编辑:
有人发布了一个答案(随后被删除),其中包含一个重要的信息花絮:即使基础 table 中的 encounter_id
列被索引,VIEW 中的 ORDER BY
操作也达不到目的。不幸的是,我们无法摆脱 ORDER BY
,因为它是 DISTINCT ON
工作所必需的。
DISTINCT ON ... ORDER BY
是性能杀手(子查询不能分解)form_id INT REFERENCES _form (id)
FK 缺少一个索引
- a
NOT EXISTS()
反连接,或者row_number()
可以用来避免DISTINCT
子查询
SET search_path=tmp;
/***/
\i tmp.sql
CREATE TABLE tform (
id INT NOT NULL
, encounter_id INT NOT NULL -- REFERENCES tencounter (id)
, ztype TEXT NOT NULL
, CONSTRAINT pk_form PRIMARY KEY (id)
-- FOREIGN KEY (cid) REFERENCES _user_in_role (id)
);
CREATE TABLE tform_details (
id INT NOT NULL
, form_id INT REFERENCES tform (id) NOT NULL
, archived BOOLEAN NOT NULL DEFAULT FALSE
, CONSTRAINT pk_form_details PRIMARY KEY (id)
-- , FOREIGN KEY (cid) REFERENCES _user_in_role (id)
);
-- ALTER TABLE tform ADD FOREIGN KEY(encounter_id) REFERENCES tencounter (id) ;
CREATE INDEX encounter_id ON tform (encounter_id, ztype);
INSERT INTO tform (id, encounter_id, ztype)
SELECT gs, 23720+gs%29, 'ztype_' || gs::text
FROM generate_series(1,10000) gs
;
INSERT INTO tform_details (id, form_id, archived)
SELECT 10000*gs+tf.id, tf.id, (random() > 0.3) ::boolean
FROM tform tf
CROSS JOIN generate_series(0,22) gs
;
UPDATE tform
SET ztype = 'vitals'
WHERE random() < 0.2;
/***/
DROP INDEX xxxx ;
CREATE UNIQUE INDEX xxxx ON tform_details (form_id, id);
VACUUM ANALYZE tform;
VACUUM ANALYZE tform_details;
\d tform;
\d tform_details;
select COUNT(*) FROM tform;
select COUNT(*) FROM tform_details;
DROP VIEW form ;
CREATE VIEW form AS
SELECT DISTINCT ON (f.id)
f.id
, f.encounter_id
, f.ztype
, fd.archived
-- , f.cid
FROM tform f
JOIN tform_details fd ON f.id = fd.form_id
ORDER BY f.id, fd.id DESC
;
DROP VIEW form2 ;
CREATE VIEW form2 AS
SELECT f.id
, f.encounter_id
, f.ztype
, fd.archived
FROM tform f
JOIN tform_details fd
ON f.id = fd.form_id
WHERE NOT EXISTS ( SELECT *
FROM tform_details nx
WHERE nx.form_id = fd.form_id
AND nx.id > fd.id
)
;
DROP VIEW form3 ;
CREATE VIEW form3 AS
SELECT f.id
, f.encounter_id
, f.ztype
, fd.archived
FROM tform f
JOIN ( select xx.form_id, xx.archived
, row_number() OVER (PARTITION BY xx.form_id ORDER BY xx.id DESC) AS rn
FROM tform_details xx
) fd ON f.id = fd.form_id AND fd.rn = 1
;
\echo burn-in
EXPLAIN ANALYZE
select * from form where encounter_id= 23728 and ztype = 'vitals' ;
\echo plain
EXPLAIN ANALYZE
select * from form where encounter_id= 23728 and ztype = 'vitals' ;
EXPLAIN ANALYZE
select * from form2 where encounter_id= 23728 and ztype = 'vitals' ;
EXPLAIN ANALYZE
select * from form3 where encounter_id= 23728 and ztype = 'vitals' ;
\echo no_hash
SET enable_hashjoin = False;
EXPLAIN ANALYZE
select * from form where encounter_id= 23728 and ztype = 'vitals' ;
EXPLAIN ANALYZE
select * from form2 where encounter_id= 23728 and ztype = 'vitals' ;
EXPLAIN ANALYZE
select * from form3 where encounter_id= 23728 and ztype = 'vitals' ;
@a_horse_with_no_name 在聊天中给了我迄今为止最快的解决方案,但从未提供答案。因此,作为参考,这是他的解决方案,使用横向连接来创建视图。
CREATE VIEW form AS
SELECT f.id,
f.encounter_id,
f.type,
fd.archived,
f.cid
FROM _form f
JOIN LATERAL (
SELECT form_id, archived
FROM _form_details _fd
WHERE _fd.form_id = f.id
ORDER BY _fd.id DESC
LIMIT 1
) AS fd ON TRUE;
这比任何其他解决方案快 10 倍。如果创建为 form4
,基于与 @wildplasser 相同的表,它的执行方式如下:
select * from form where encounter_id= 23728 and ztype = 'vitals' ;
Time: 181.065 ms
select * from form2 where encounter_id= 23728 and ztype = 'vitals' ;
Time: 12.395 ms
select * from form3 where encounter_id= 23728 and ztype = 'vitals' ;
Time: 122.305 ms
select * from form4 where encounter_id= 23728 and ztype = 'vitals' ;
Time: 1.305 ms
Postgres 9.3 中引入的一些关于横向连接的好建议: