postgresql 忽略递归查询的索引
postgresql ignores index for recursive query
我有一个 table 表示层次结构链接图(parent_id、child_id)
table 有父级、子级和两者的索引。
该图可能包含循环,我需要检查它们(或者,也许我需要找到所有循环以消除它们)。
而且我需要追溯查询一个节点的所有父节点。
为此我使用这个查询(它应该保存在视图中):
WITH RECURSIVE recursion(parent_id, child_id, node_id, path) AS (
SELECT h.parent_id,
h.child_id,
h.child_id AS node_id,
ARRAY[h.parent_id, h.child_id] AS path
FROM hierarchy h
UNION ALL
SELECT h.parent_id,
h.child_id,
r.node_id,
ARRAY[h.parent_id] || r.path
FROM hierarchy h JOIN recursion r ON h.child_id = r.parent_id
WHERE NOT r.path @> ARRAY[h.parent_id]
)
SELECT parent_id,
child_id,
node_id,
path
FROM recursion
where node_id = 883
对于这个查询,postgres 将使用非常棒的计划:
"CTE Scan on recursion (cost=2703799682.88..4162807558.70 rows=324223972 width=56)"
" Filter: (node_id = 883)"
" CTE recursion"
" -> Recursive Union (cost=0.00..2703799682.88 rows=64844794481 width=56)"
" -> Seq Scan on hierarchy h (cost=0.00..74728.61 rows=4210061 width=56)"
" -> Merge Join (cost=10058756.99..140682906.47 rows=6484058442 width=56)"
" Merge Cond: (h_1.child_id = r.parent_id)"
" Join Filter: (NOT (r.path @> ARRAY[h_1.parent_id]))"
" -> Index Scan using hierarchy_idx_child on hierarchy h_1 (cost=0.43..256998.25 rows=4210061 width=16)"
" -> Materialize (cost=10058756.56..10269259.61 rows=42100610 width=48)"
" -> Sort (cost=10058756.56..10164008.08 rows=42100610 width=48)"
" Sort Key: r.parent_id"
" -> WorkTable Scan on recursion r (cost=0.00..842012.20 rows=42100610 width=48)"
似乎 postgres 不理解 node_id 上的外部过滤器应用于第一个递归子查询中的 child_id。
我想我做错了。但具体在哪里?
看来您只需要将 WHERE node_id = 883
移动到并集的第一部分:
WITH RECURSIVE recursion(parent_id, child_id, node_id, path) AS (
SELECT h.parent_id,
h.child_id,
h.child_id AS node_id,
ARRAY[h.parent_id, h.child_id] AS path
FROM hierarchy h
WHERE node_id = 883
UNION ALL
SELECT h.parent_id,
h.child_id,
r.node_id,
ARRAY[h.parent_id] || r.path
FROM hierarchy h JOIN recursion r ON h.child_id = r.parent_id
WHERE NOT r.path @> ARRAY[h.parent_id]
)
SELECT parent_id,
child_id,
node_id,
path
FROM recursion
这是解决图遍历任务的更有效的方法。
CREATE OR REPLACE FUNCTION public.terr_ancestors(IN bigint)
RETURNS TABLE(node_id bigint, depth integer, path bigint[]) AS
$BODY$
WITH RECURSIVE recursion(node_id, depth, path) AS (
SELECT as node_id, 0, ARRAY[] AS path
UNION ALL
SELECT h.parent_id as node_id, r.depth + 1, h.parent_id || r.path
FROM hierarchy h JOIN recursion r ON h.child_id = r.node_id
WHERE h.parent_id != ANY(path)
)
SELECT * FROM recursion
$BODY$
后代也采用类似的方式。
我有一个 table 表示层次结构链接图(parent_id、child_id) table 有父级、子级和两者的索引。 该图可能包含循环,我需要检查它们(或者,也许我需要找到所有循环以消除它们)。
而且我需要追溯查询一个节点的所有父节点。 为此我使用这个查询(它应该保存在视图中):
WITH RECURSIVE recursion(parent_id, child_id, node_id, path) AS (
SELECT h.parent_id,
h.child_id,
h.child_id AS node_id,
ARRAY[h.parent_id, h.child_id] AS path
FROM hierarchy h
UNION ALL
SELECT h.parent_id,
h.child_id,
r.node_id,
ARRAY[h.parent_id] || r.path
FROM hierarchy h JOIN recursion r ON h.child_id = r.parent_id
WHERE NOT r.path @> ARRAY[h.parent_id]
)
SELECT parent_id,
child_id,
node_id,
path
FROM recursion
where node_id = 883
对于这个查询,postgres 将使用非常棒的计划:
"CTE Scan on recursion (cost=2703799682.88..4162807558.70 rows=324223972 width=56)"
" Filter: (node_id = 883)"
" CTE recursion"
" -> Recursive Union (cost=0.00..2703799682.88 rows=64844794481 width=56)"
" -> Seq Scan on hierarchy h (cost=0.00..74728.61 rows=4210061 width=56)"
" -> Merge Join (cost=10058756.99..140682906.47 rows=6484058442 width=56)"
" Merge Cond: (h_1.child_id = r.parent_id)"
" Join Filter: (NOT (r.path @> ARRAY[h_1.parent_id]))"
" -> Index Scan using hierarchy_idx_child on hierarchy h_1 (cost=0.43..256998.25 rows=4210061 width=16)"
" -> Materialize (cost=10058756.56..10269259.61 rows=42100610 width=48)"
" -> Sort (cost=10058756.56..10164008.08 rows=42100610 width=48)"
" Sort Key: r.parent_id"
" -> WorkTable Scan on recursion r (cost=0.00..842012.20 rows=42100610 width=48)"
似乎 postgres 不理解 node_id 上的外部过滤器应用于第一个递归子查询中的 child_id。
我想我做错了。但具体在哪里?
看来您只需要将 WHERE node_id = 883
移动到并集的第一部分:
WITH RECURSIVE recursion(parent_id, child_id, node_id, path) AS (
SELECT h.parent_id,
h.child_id,
h.child_id AS node_id,
ARRAY[h.parent_id, h.child_id] AS path
FROM hierarchy h
WHERE node_id = 883
UNION ALL
SELECT h.parent_id,
h.child_id,
r.node_id,
ARRAY[h.parent_id] || r.path
FROM hierarchy h JOIN recursion r ON h.child_id = r.parent_id
WHERE NOT r.path @> ARRAY[h.parent_id]
)
SELECT parent_id,
child_id,
node_id,
path
FROM recursion
这是解决图遍历任务的更有效的方法。
CREATE OR REPLACE FUNCTION public.terr_ancestors(IN bigint)
RETURNS TABLE(node_id bigint, depth integer, path bigint[]) AS
$BODY$
WITH RECURSIVE recursion(node_id, depth, path) AS (
SELECT as node_id, 0, ARRAY[] AS path
UNION ALL
SELECT h.parent_id as node_id, r.depth + 1, h.parent_id || r.path
FROM hierarchy h JOIN recursion r ON h.child_id = r.node_id
WHERE h.parent_id != ANY(path)
)
SELECT * FROM recursion
$BODY$
后代也采用类似的方式。