查询规划器不使用过滤器来限制高成本连接中的计算
Query planner not using filter to limit calculations in high-cost join
我正在尝试解决我在 PostgreSQL 中遇到的查询性能问题。
正在建模的一般概念:正在购买和分配的软件许可证。我想我已经删除了足够多的其他正在建模的东西,它现在与标准酒店房间预订系统非常相似,除了当前任务(酒店预订)没有已知结束日期是正常的。
查询目的: 这是一个视图,它汇集了显示有关许可证及其来源的信息所需的信息。当应用程序查询视图时,它提供了一个tLicence.id
,以便返回一行。
查询中剩余的非酒店类概念:
- 一些许可协议限制了软件重新分配的速度;这已被硬编码到查询中作为 1 天。
- 从理论上讲,许可证可以同时分配过去和现在;这 不应该 发生,并且应用程序不鼓励这样做,但是如果人类在现实世界中犯了错误,应用程序确实允许将该错误输入系统。这明显不同于一般的酒店系统,如果客人走错房间,当前入住的人会反对。
别名purchase_quantities_assignnments
的嵌套SELECT
是数据库中的视图(为方便起见,此处内联)。理想情况下,我希望对性能问题的任何修复都不需要将此视图的修改版本内联到查询中;理想情况下,视图可以继续按原样存在,并在其他查询中以其他方式使用。
问题
如果我使用 WHERE tLicences.id = 19
查询此视图(查询),则需要很长时间才能生成结果。特别是,它似乎正在为 periodsOfAvailability_start
生成整个集合(这很慢),然后加入;这个结论是基于 EXPLAIN ANALYZE GroupAggregate
返回 10 行(这是购买的数量)。我 感觉 像查询规划器 应该 能够弄清楚 tAssignments.purchase_id
可以用来显着减少 periodsOfAvailability_start
需要生成。
但是,如果我使用 WHERE tLicences.id = 19 AND tLicences.purchase_id = ?
[?
作为该许可证的购买 ID] 查询此视图(查询),则查询 运行s 如预期的那样,仅生成为具有该购买 ID 的 periodsOfAvailability_start
设置;这个结论是基于 EXPLAIN ANALYZE GroupAggregate
返回 1 行(这是许可证所属的购买数量)。
查询
SELECT *
FROM test.tPurchases AS tPurchases
INNER JOIN test.tLicences
ON tLicences.purchase_id = tPurchases.id
LEFT JOIN (
SELECT
purchase_id,
SUM(
CASE
assignment_newer_id IS NOT null
WHEN true THEN 1
WHEN false THEN 0
END
) AS prchs_quantity_assigned,
SUM(
CASE
assignment_newer_id IS null AND
current_timestamp
BETWEEN licence_availability_start AND
licence_availability_end
WHEN true THEN 1
WHEN false THEN 0
END
) AS prchs_quantity_notAssignedAndCanBeAssigned,
SUM(
CASE
assignment_newer_id IS null AND
current_timestamp < licence_availability_start
WHEN true THEN 1
WHEN false THEN 0
END
) AS prchs_quantity_notAssignedAndCannotBeAssigned
FROM (
SELECT
tPurchases.id AS purchase_id,
tPurchases.date_ AS purchase_date,
tLicences.id AS licence_id,
GREATEST(
tPurchases.date_,
older.end_,
older.start + '1 day'::interval
) AS licence_availability_start,
CASE
WHEN newer.id IS NULL THEN 'infinity'
ELSE newer.start - '1 day'::interval
END AS licence_availability_end,
COALESCE(newer.start, 'infinity') AS licence_availability_uninstallBy,
older.id AS assignment_older_id,
older.start AS assignment_older_start,
older.end_ AS assignment_older_end,
newer.id AS assignment_newer_id,
newer.start AS assignment_newer_start,
newer.end_ AS assignment_newer_end
FROM test.tLicences
INNER JOIN test.tPurchases
ON tPurchases.id = tLicences.purchase_id
LEFT JOIN test.tAssignments AS older
ON (
NOT older.deleted AND
older.licence_id = tLicences.id
)
LEFT JOIN test.tAssignments AS newer
ON (
NOT newer.deleted AND
newer.id <> older.id AND
newer.licence_id = older.licence_id
)
WHERE
NOT tLicences.deleted
UNION
SELECT
tPurchases.id AS purchase_id,
tPurchases.date_ AS purchase_date,
tLicences.id AS licence_id,
tPurchases.date_ AS licence_availability_start,
oldest.start - '1 day'::interval AS licence_availability_end,
oldest.start AS licence_availability_uninstallBy,
null AS assignment_older_id,
null AS assignment_older_start,
null AS assignment_older_end,
oldest.id AS assignment_newer_id,
oldest.start AS assignment_newer_start,
oldest.end_ AS assignment_newer_end
FROM test.tLicences
INNER JOIN test.tPurchases
ON tPurchases.id = tLicences.purchase_id
INNER JOIN test.tAssignments AS oldest
ON oldest.licence_id = tLicences.id
WHERE
NOT tLicences.deleted AND
NOT oldest.deleted
) AS periodsOfAvailability_start
WHERE
(assignment_newer_id IS null OR assignment_newer_end IS null)
GROUP BY purchase_id
) AS purchase_quantities_assignnments
ON
purchase_quantities_assignnments.purchase_id = tPurchases.id
WHERE
tLicences.id = 19 /* [Unexpected behaviour] The full set for "purchase_quantities_assignnments" is generated */
--tLicences.id = 19 AND tLicences.purchase_id = ? /* [Expected behaviour] Only the single relevant row for "purchase_quantities_assignnments" appears to be generated */
--tLicences.id = 19 AND tPurchases.id = ? /* [Expected behaviour] Only the single relevant row for "purchase_quantities_assignnments" appears to be generated */
--tLicences.purchase_id = ? /* [Expected behaviour] Only the single relevant row for "purchase_quantities_assignnments" appears to be generated. Note: This is a different query *result* than the others */
问题:有什么方法可以解决这个问题,而无需提供 tLicences.purchase_id
?
数据库版本:PostgreSQL9.0
SQL 生成模式、表并填充这些表:
这有点长运行宁,因为我想要一个类似于我们实际数据的数量。如果 运行 时间有问题,可以减少许可证数量 (30000) 和分配数量 (100000)。
CREATE SCHEMA test;
CREATE TABLE test.tPurchases (
id serial not null,
date_ date not null,
/* … */
deleted boolean not null DEFAULT false,
PRIMARY KEY (id)
);
CREATE TABLE test.tLicences (
id serial not null,
purchase_id integer not null,
/* … */
deleted boolean not null DEFAULT false,
PRIMARY KEY (id),
FOREIGN KEY (purchase_id)
REFERENCES test.tPurchases (id)
ON UPDATE RESTRICT
ON DELETE RESTRICT
);
CREATE INDEX ON test.tLicences(purchase_id);
CREATE TABLE test.tAssignments (
id serial not null,
licence_id integer not null,
start date not null,
end_ date,
/* … */
deleted boolean not null DEFAULT false,
PRIMARY KEY (id),
FOREIGN KEY (licence_id)
REFERENCES test.tLicences (id)
ON UPDATE RESTRICT
ON DELETE RESTRICT,
CHECK (start <= end_)
);
CREATE INDEX ON test.tAssignments(licence_id);
INSERT INTO test.tPurchases(id, date_)
SELECT
id,
'2000-01-01'::timestamp + random() * '1 year'::interval AS date_
FROM generate_series(1, 10) AS id
;
INSERT INTO test.tLicences(id, purchase_id, deleted)
SELECT
id,
trunc(random() * 10 + 1) AS purchase_id,
(random() > 0.99) AS deleted
FROM generate_series(1, 30000) AS id
;
INSERT INTO test.tAssignments(id, licence_id, start, end_, deleted)
SELECT
assignments.id,
assignments.licence_id,
tPurchases.date_ + ((rank * 20 + random() * 10) || ' days')::interval AS start,
CASE
assignments.rank = max(assignments.rank) OVER (PARTITION BY assignments.licence_id) AND
random() > 0.5
WHEN true THEN null
ELSE tPurchases.date_ + ((rank * 20 + 10 + random() * 10) || ' days')::interval
END AS end_,
(random() > 0.95) AS deleted
FROM (
SELECT
assignments.id,
assignments.licence_id,
rank() OVER (PARTITION BY assignments.licence_id ORDER BY assignments.id) AS rank
FROM (
SELECT
id,
trunc(random() * 30000 + 1) AS licence_id
FROM generate_series(1, 100000) AS id
) AS assignments
) AS assignments
INNER JOIN test.tLicences
ON tLicences.id = assignments.licence_id
INNER JOIN test.tPurchases
ON tPurchases.id = tLicences.purchase_id
;
您可能需要 运行 统计信息,但通常您可以使用 CTE 强制执行您想要的优化。这里我也把你对CTE的所有子查询都拿出来,只是为了说清楚:
WITH myPurchases AS
(
SELECT *
FROM test.tPurchases AS tPurchases
WHERE tLicences.id = 19
), periodsOfAvailability_start AS
(
SELECT
tPurchases.id AS purchase_id,
tPurchases.date_ AS purchase_date,
tLicences.id AS licence_id,
GREATEST(tPurchases.date_, older.end_, older.start + '1 day'::interval) AS licence_availability_start,
CASE WHEN newer.id IS NULL THEN 'infinity' ELSE newer.start - '1 day'::interval END AS licence_availability_end,
COALESCE(newer.start, 'infinity') AS licence_availability_uninstallBy,
older.id AS assignment_older_id,
older.start AS assignment_older_start,
older.end_ AS assignment_older_end,
newer.id AS assignment_newer_id,
newer.start AS assignment_newer_start,
newer.end_ AS assignment_newer_end
FROM test.tLicences
INNER JOIN myPurchases AS tPurchases ON tPurchases.id = tLicences.purchase_id
LEFT JOIN test.tAssignments AS older ON (NOT older.deleted AND older.licence_id = tLicences.id)
LEFT JOIN test.tAssignments AS newer ON (NOT newer.deleted AND newer.id <> older.id AND newer.licence_id = older.licence_id)
WHERE NOT tLicences.deleted
UNION
SELECT
tPurchases.id AS purchase_id,
tPurchases.date_ AS purchase_date,
tLicences.id AS licence_id,
tPurchases.date_ AS licence_availability_start,
oldest.start - '1 day'::interval AS licence_availability_end,
oldest.start AS licence_availability_uninstallBy,
null AS assignment_older_id,
null AS assignment_older_start,
null AS assignment_older_end,
oldest.id AS assignment_newer_id,
oldest.start AS assignment_newer_start,
oldest.end_ AS assignment_newer_end
FROM test.tLicences
INNER JOIN myPurchases AS tPurchases ON tPurchases.id = tLicences.purchase_id
INNER JOIN test.tAssignments AS oldest ON oldest.licence_id = tLicences.id
WHERE NOT tLicences.deleted AND NOT oldest.deleted
), purchase_quantities_assignnments AS
(
SELECT
purchase_id,
SUM(CASE WHEN assignment_newer_id IS NOT null THEN 1 ELSE 0 END) AS prchs_quantity_assigned,
SUM(CASE WHEN assignment_newer_id IS null AND current_timestamp BETWEEN licence_availability_start AND licence_availability_end THEN 1 ELSE false END) AS prchs_quantity_notAssignedAndCanBeAssigned,
SUM(CASE WHEN assignment_newer_id IS null AND current_timestamp < licence_availability_start THEN 1 ELSE 0 END) AS prchs_quantity_notAssignedAndCannotBeAssigned
FROM periodsOfAvailability_start
WHERE assignment_newer_id IS null OR assignment_newer_end IS null
GROUP BY purchase_id
)
SELECT *
FROM myPurchases AS tPurchases
INNER JOIN test.tLicences ON tLicences.purchase_id = tPurchases.id
LEFT JOIN purchase_quantities_assignnments ON purchase_quantities_assignnments.purchase_id = tPurchases.id
我正在尝试解决我在 PostgreSQL 中遇到的查询性能问题。
正在建模的一般概念:正在购买和分配的软件许可证。我想我已经删除了足够多的其他正在建模的东西,它现在与标准酒店房间预订系统非常相似,除了当前任务(酒店预订)没有已知结束日期是正常的。
查询目的: 这是一个视图,它汇集了显示有关许可证及其来源的信息所需的信息。当应用程序查询视图时,它提供了一个tLicence.id
,以便返回一行。
查询中剩余的非酒店类概念:
- 一些许可协议限制了软件重新分配的速度;这已被硬编码到查询中作为 1 天。
- 从理论上讲,许可证可以同时分配过去和现在;这 不应该 发生,并且应用程序不鼓励这样做,但是如果人类在现实世界中犯了错误,应用程序确实允许将该错误输入系统。这明显不同于一般的酒店系统,如果客人走错房间,当前入住的人会反对。
别名purchase_quantities_assignnments
的嵌套SELECT
是数据库中的视图(为方便起见,此处内联)。理想情况下,我希望对性能问题的任何修复都不需要将此视图的修改版本内联到查询中;理想情况下,视图可以继续按原样存在,并在其他查询中以其他方式使用。
问题
如果我使用 WHERE tLicences.id = 19
查询此视图(查询),则需要很长时间才能生成结果。特别是,它似乎正在为 periodsOfAvailability_start
生成整个集合(这很慢),然后加入;这个结论是基于 EXPLAIN ANALYZE GroupAggregate
返回 10 行(这是购买的数量)。我 感觉 像查询规划器 应该 能够弄清楚 tAssignments.purchase_id
可以用来显着减少 periodsOfAvailability_start
需要生成。
但是,如果我使用 WHERE tLicences.id = 19 AND tLicences.purchase_id = ?
[?
作为该许可证的购买 ID] 查询此视图(查询),则查询 运行s 如预期的那样,仅生成为具有该购买 ID 的 periodsOfAvailability_start
设置;这个结论是基于 EXPLAIN ANALYZE GroupAggregate
返回 1 行(这是许可证所属的购买数量)。
查询
SELECT *
FROM test.tPurchases AS tPurchases
INNER JOIN test.tLicences
ON tLicences.purchase_id = tPurchases.id
LEFT JOIN (
SELECT
purchase_id,
SUM(
CASE
assignment_newer_id IS NOT null
WHEN true THEN 1
WHEN false THEN 0
END
) AS prchs_quantity_assigned,
SUM(
CASE
assignment_newer_id IS null AND
current_timestamp
BETWEEN licence_availability_start AND
licence_availability_end
WHEN true THEN 1
WHEN false THEN 0
END
) AS prchs_quantity_notAssignedAndCanBeAssigned,
SUM(
CASE
assignment_newer_id IS null AND
current_timestamp < licence_availability_start
WHEN true THEN 1
WHEN false THEN 0
END
) AS prchs_quantity_notAssignedAndCannotBeAssigned
FROM (
SELECT
tPurchases.id AS purchase_id,
tPurchases.date_ AS purchase_date,
tLicences.id AS licence_id,
GREATEST(
tPurchases.date_,
older.end_,
older.start + '1 day'::interval
) AS licence_availability_start,
CASE
WHEN newer.id IS NULL THEN 'infinity'
ELSE newer.start - '1 day'::interval
END AS licence_availability_end,
COALESCE(newer.start, 'infinity') AS licence_availability_uninstallBy,
older.id AS assignment_older_id,
older.start AS assignment_older_start,
older.end_ AS assignment_older_end,
newer.id AS assignment_newer_id,
newer.start AS assignment_newer_start,
newer.end_ AS assignment_newer_end
FROM test.tLicences
INNER JOIN test.tPurchases
ON tPurchases.id = tLicences.purchase_id
LEFT JOIN test.tAssignments AS older
ON (
NOT older.deleted AND
older.licence_id = tLicences.id
)
LEFT JOIN test.tAssignments AS newer
ON (
NOT newer.deleted AND
newer.id <> older.id AND
newer.licence_id = older.licence_id
)
WHERE
NOT tLicences.deleted
UNION
SELECT
tPurchases.id AS purchase_id,
tPurchases.date_ AS purchase_date,
tLicences.id AS licence_id,
tPurchases.date_ AS licence_availability_start,
oldest.start - '1 day'::interval AS licence_availability_end,
oldest.start AS licence_availability_uninstallBy,
null AS assignment_older_id,
null AS assignment_older_start,
null AS assignment_older_end,
oldest.id AS assignment_newer_id,
oldest.start AS assignment_newer_start,
oldest.end_ AS assignment_newer_end
FROM test.tLicences
INNER JOIN test.tPurchases
ON tPurchases.id = tLicences.purchase_id
INNER JOIN test.tAssignments AS oldest
ON oldest.licence_id = tLicences.id
WHERE
NOT tLicences.deleted AND
NOT oldest.deleted
) AS periodsOfAvailability_start
WHERE
(assignment_newer_id IS null OR assignment_newer_end IS null)
GROUP BY purchase_id
) AS purchase_quantities_assignnments
ON
purchase_quantities_assignnments.purchase_id = tPurchases.id
WHERE
tLicences.id = 19 /* [Unexpected behaviour] The full set for "purchase_quantities_assignnments" is generated */
--tLicences.id = 19 AND tLicences.purchase_id = ? /* [Expected behaviour] Only the single relevant row for "purchase_quantities_assignnments" appears to be generated */
--tLicences.id = 19 AND tPurchases.id = ? /* [Expected behaviour] Only the single relevant row for "purchase_quantities_assignnments" appears to be generated */
--tLicences.purchase_id = ? /* [Expected behaviour] Only the single relevant row for "purchase_quantities_assignnments" appears to be generated. Note: This is a different query *result* than the others */
问题:有什么方法可以解决这个问题,而无需提供 tLicences.purchase_id
?
数据库版本:PostgreSQL9.0
SQL 生成模式、表并填充这些表:
这有点长运行宁,因为我想要一个类似于我们实际数据的数量。如果 运行 时间有问题,可以减少许可证数量 (30000) 和分配数量 (100000)。
CREATE SCHEMA test;
CREATE TABLE test.tPurchases (
id serial not null,
date_ date not null,
/* … */
deleted boolean not null DEFAULT false,
PRIMARY KEY (id)
);
CREATE TABLE test.tLicences (
id serial not null,
purchase_id integer not null,
/* … */
deleted boolean not null DEFAULT false,
PRIMARY KEY (id),
FOREIGN KEY (purchase_id)
REFERENCES test.tPurchases (id)
ON UPDATE RESTRICT
ON DELETE RESTRICT
);
CREATE INDEX ON test.tLicences(purchase_id);
CREATE TABLE test.tAssignments (
id serial not null,
licence_id integer not null,
start date not null,
end_ date,
/* … */
deleted boolean not null DEFAULT false,
PRIMARY KEY (id),
FOREIGN KEY (licence_id)
REFERENCES test.tLicences (id)
ON UPDATE RESTRICT
ON DELETE RESTRICT,
CHECK (start <= end_)
);
CREATE INDEX ON test.tAssignments(licence_id);
INSERT INTO test.tPurchases(id, date_)
SELECT
id,
'2000-01-01'::timestamp + random() * '1 year'::interval AS date_
FROM generate_series(1, 10) AS id
;
INSERT INTO test.tLicences(id, purchase_id, deleted)
SELECT
id,
trunc(random() * 10 + 1) AS purchase_id,
(random() > 0.99) AS deleted
FROM generate_series(1, 30000) AS id
;
INSERT INTO test.tAssignments(id, licence_id, start, end_, deleted)
SELECT
assignments.id,
assignments.licence_id,
tPurchases.date_ + ((rank * 20 + random() * 10) || ' days')::interval AS start,
CASE
assignments.rank = max(assignments.rank) OVER (PARTITION BY assignments.licence_id) AND
random() > 0.5
WHEN true THEN null
ELSE tPurchases.date_ + ((rank * 20 + 10 + random() * 10) || ' days')::interval
END AS end_,
(random() > 0.95) AS deleted
FROM (
SELECT
assignments.id,
assignments.licence_id,
rank() OVER (PARTITION BY assignments.licence_id ORDER BY assignments.id) AS rank
FROM (
SELECT
id,
trunc(random() * 30000 + 1) AS licence_id
FROM generate_series(1, 100000) AS id
) AS assignments
) AS assignments
INNER JOIN test.tLicences
ON tLicences.id = assignments.licence_id
INNER JOIN test.tPurchases
ON tPurchases.id = tLicences.purchase_id
;
您可能需要 运行 统计信息,但通常您可以使用 CTE 强制执行您想要的优化。这里我也把你对CTE的所有子查询都拿出来,只是为了说清楚:
WITH myPurchases AS
(
SELECT *
FROM test.tPurchases AS tPurchases
WHERE tLicences.id = 19
), periodsOfAvailability_start AS
(
SELECT
tPurchases.id AS purchase_id,
tPurchases.date_ AS purchase_date,
tLicences.id AS licence_id,
GREATEST(tPurchases.date_, older.end_, older.start + '1 day'::interval) AS licence_availability_start,
CASE WHEN newer.id IS NULL THEN 'infinity' ELSE newer.start - '1 day'::interval END AS licence_availability_end,
COALESCE(newer.start, 'infinity') AS licence_availability_uninstallBy,
older.id AS assignment_older_id,
older.start AS assignment_older_start,
older.end_ AS assignment_older_end,
newer.id AS assignment_newer_id,
newer.start AS assignment_newer_start,
newer.end_ AS assignment_newer_end
FROM test.tLicences
INNER JOIN myPurchases AS tPurchases ON tPurchases.id = tLicences.purchase_id
LEFT JOIN test.tAssignments AS older ON (NOT older.deleted AND older.licence_id = tLicences.id)
LEFT JOIN test.tAssignments AS newer ON (NOT newer.deleted AND newer.id <> older.id AND newer.licence_id = older.licence_id)
WHERE NOT tLicences.deleted
UNION
SELECT
tPurchases.id AS purchase_id,
tPurchases.date_ AS purchase_date,
tLicences.id AS licence_id,
tPurchases.date_ AS licence_availability_start,
oldest.start - '1 day'::interval AS licence_availability_end,
oldest.start AS licence_availability_uninstallBy,
null AS assignment_older_id,
null AS assignment_older_start,
null AS assignment_older_end,
oldest.id AS assignment_newer_id,
oldest.start AS assignment_newer_start,
oldest.end_ AS assignment_newer_end
FROM test.tLicences
INNER JOIN myPurchases AS tPurchases ON tPurchases.id = tLicences.purchase_id
INNER JOIN test.tAssignments AS oldest ON oldest.licence_id = tLicences.id
WHERE NOT tLicences.deleted AND NOT oldest.deleted
), purchase_quantities_assignnments AS
(
SELECT
purchase_id,
SUM(CASE WHEN assignment_newer_id IS NOT null THEN 1 ELSE 0 END) AS prchs_quantity_assigned,
SUM(CASE WHEN assignment_newer_id IS null AND current_timestamp BETWEEN licence_availability_start AND licence_availability_end THEN 1 ELSE false END) AS prchs_quantity_notAssignedAndCanBeAssigned,
SUM(CASE WHEN assignment_newer_id IS null AND current_timestamp < licence_availability_start THEN 1 ELSE 0 END) AS prchs_quantity_notAssignedAndCannotBeAssigned
FROM periodsOfAvailability_start
WHERE assignment_newer_id IS null OR assignment_newer_end IS null
GROUP BY purchase_id
)
SELECT *
FROM myPurchases AS tPurchases
INNER JOIN test.tLicences ON tLicences.purchase_id = tPurchases.id
LEFT JOIN purchase_quantities_assignnments ON purchase_quantities_assignnments.purchase_id = tPurchases.id