如何使用子查询优化 sql 查询,也许是通过横向连接?
How to optimize sql query with subqueries, perhaps by lateral join?
我正在尝试优化复杂的 sql 查询,它将在每次地图绑定框更改时执行。我认为 INNER LATERAL JOIN
会是最快的,但事实并非如此。有谁知道如何加快此查询以及如何更好地利用 LATERAL JOIN
?
我最快的查询:
SELECT r0."id", r0."name"
FROM "hiking"."routes" AS r0
INNER JOIN "hiking"."hierarchy" AS h1 ON r0."id" = h1."parent"
INNER JOIN (SELECT DISTINCT unnest(s0."rels") AS "rel"
FROM "hiking"."segments" AS s0
WHERE (ST_Intersects(s0."geom", ST_SetSrid(ST_MakeBox2D(ST_GeomFromText('POINT(1285982.015631 7217169.814674)', -1), ST_GeomFromText('POINT(2371999.313507 6454022.524275)', -1)), 3857)))) AS s2 ON TRUE
WHERE (s2."rel" = h1."child");
Planning time: ~0.605 ms Execution time: ~37.232 ms
其实和上面一样,但是LATERAL JOIN
,是不是比较慢?
SELECT r0."id", r0."name"
FROM "hiking"."routes" AS r0
INNER JOIN "hiking"."hierarchy" AS h1 ON r0."id" = h1."parent"
INNER JOIN LATERAL (SELECT DISTINCT unnest(s0."rels") AS "rel"
FROM "hiking"."segments" AS s0
WHERE (ST_Intersects(s0."geom", ST_SetSrid(ST_MakeBox2D(ST_GeomFromText('POINT(1285982.015631 7217169.814674)', -1), ST_GeomFromText('POINT(2371999.313507 6454022.524275)', -1)), 3857)))) AS s2 ON TRUE
WHERE (s2."rel" = h1."child");
Planning time: ~1.353 ms Execution time: ~38.518 ms
子查询中包含子查询的最慢查询(这是我的第一个,所以我对其进行了一些改进):
SELECT r0."id", r0."name"
FROM "hiking"."routes" AS r0
INNER JOIN (SELECT DISTINCT h0."parent" AS "parent"
FROM "hiking"."hierarchy" AS h0
INNER JOIN (SELECT DISTINCT unnest(s0."rels") AS "rel"
FROM "hiking"."segments" AS s0
WHERE (ST_Intersects(s0."geom", ST_SetSrid(ST_MakeBox2D(ST_GeomFromText('POINT(1285982.015631 7217169.814674)', -1), ST_GeomFromText('POINT(2371999.313507 6454022.524275)', -1)), 3857)))) AS s1 ON TRUE
WHERE (h0."child" = s1."rel")) AS s1 ON TRUE
WHERE (r0."top" AND (r0."id" = s1."parent"));
Planning time: ~1.017 ms Execution time: ~41.288 ms
如果不了解您的数据库,很难重现您的查询逻辑,但我会尝试,所以请耐心等待:
SELECT r0."id", r0."name"
FROM "hiking"."routes" AS r0
INNER JOIN "hiking"."hierarchy" AS h1 ON r0."id" = h1."parent"
WHERE
EXISTS (
SELECT 1
FROM "hiking"."segments" AS s0
WHERE (
ST_Intersects(
s0."geom",
ST_SetSrid(ST_MakeBox2D(ST_GeomFromText('POINT(1285982.015631 7217169.814674)', -1), ST_GeomFromText('POINT(2371999.313507 6454022.524275)', -1)),
3857)))
AND array[h1."child"] <@ s0."rels");
有两点:
- 通过
EXISTS
或 NOT EXISTS
过滤数据有时比加入 更快
- 您可以使用数组比较运算符,而不是取消嵌套数组字段以将其元素与某个值进行比较。拥有适当的 GIN 索引会更快(文档 here and here)。
这里是一个简单的例子,说明如何在数组上使用索引以及它如何更快:
create table foo(bar int[]);
insert into foo(bar) select array[1,2,3,x] from generate_series(1,1000000) as x;
create index idx on foo using gin (bar); // Note this
select * from foo where 666 in (select unnest(bar)); // 6936,345 ms on my HW
select * from foo where array[666] <@ bar; // 45,524 ms
我正在尝试优化复杂的 sql 查询,它将在每次地图绑定框更改时执行。我认为 INNER LATERAL JOIN
会是最快的,但事实并非如此。有谁知道如何加快此查询以及如何更好地利用 LATERAL JOIN
?
我最快的查询:
SELECT r0."id", r0."name"
FROM "hiking"."routes" AS r0
INNER JOIN "hiking"."hierarchy" AS h1 ON r0."id" = h1."parent"
INNER JOIN (SELECT DISTINCT unnest(s0."rels") AS "rel"
FROM "hiking"."segments" AS s0
WHERE (ST_Intersects(s0."geom", ST_SetSrid(ST_MakeBox2D(ST_GeomFromText('POINT(1285982.015631 7217169.814674)', -1), ST_GeomFromText('POINT(2371999.313507 6454022.524275)', -1)), 3857)))) AS s2 ON TRUE
WHERE (s2."rel" = h1."child");
Planning time: ~0.605 ms Execution time: ~37.232 ms
其实和上面一样,但是LATERAL JOIN
,是不是比较慢?
SELECT r0."id", r0."name"
FROM "hiking"."routes" AS r0
INNER JOIN "hiking"."hierarchy" AS h1 ON r0."id" = h1."parent"
INNER JOIN LATERAL (SELECT DISTINCT unnest(s0."rels") AS "rel"
FROM "hiking"."segments" AS s0
WHERE (ST_Intersects(s0."geom", ST_SetSrid(ST_MakeBox2D(ST_GeomFromText('POINT(1285982.015631 7217169.814674)', -1), ST_GeomFromText('POINT(2371999.313507 6454022.524275)', -1)), 3857)))) AS s2 ON TRUE
WHERE (s2."rel" = h1."child");
Planning time: ~1.353 ms Execution time: ~38.518 ms
子查询中包含子查询的最慢查询(这是我的第一个,所以我对其进行了一些改进):
SELECT r0."id", r0."name"
FROM "hiking"."routes" AS r0
INNER JOIN (SELECT DISTINCT h0."parent" AS "parent"
FROM "hiking"."hierarchy" AS h0
INNER JOIN (SELECT DISTINCT unnest(s0."rels") AS "rel"
FROM "hiking"."segments" AS s0
WHERE (ST_Intersects(s0."geom", ST_SetSrid(ST_MakeBox2D(ST_GeomFromText('POINT(1285982.015631 7217169.814674)', -1), ST_GeomFromText('POINT(2371999.313507 6454022.524275)', -1)), 3857)))) AS s1 ON TRUE
WHERE (h0."child" = s1."rel")) AS s1 ON TRUE
WHERE (r0."top" AND (r0."id" = s1."parent"));
Planning time: ~1.017 ms Execution time: ~41.288 ms
如果不了解您的数据库,很难重现您的查询逻辑,但我会尝试,所以请耐心等待:
SELECT r0."id", r0."name"
FROM "hiking"."routes" AS r0
INNER JOIN "hiking"."hierarchy" AS h1 ON r0."id" = h1."parent"
WHERE
EXISTS (
SELECT 1
FROM "hiking"."segments" AS s0
WHERE (
ST_Intersects(
s0."geom",
ST_SetSrid(ST_MakeBox2D(ST_GeomFromText('POINT(1285982.015631 7217169.814674)', -1), ST_GeomFromText('POINT(2371999.313507 6454022.524275)', -1)),
3857)))
AND array[h1."child"] <@ s0."rels");
有两点:
- 通过
EXISTS
或NOT EXISTS
过滤数据有时比加入 更快
- 您可以使用数组比较运算符,而不是取消嵌套数组字段以将其元素与某个值进行比较。拥有适当的 GIN 索引会更快(文档 here and here)。
这里是一个简单的例子,说明如何在数组上使用索引以及它如何更快:
create table foo(bar int[]);
insert into foo(bar) select array[1,2,3,x] from generate_series(1,1000000) as x;
create index idx on foo using gin (bar); // Note this
select * from foo where 666 in (select unnest(bar)); // 6936,345 ms on my HW
select * from foo where array[666] <@ bar; // 45,524 ms