在 plpgsql(PostgreSQL 的)中,CTE 可以保留到外循环吗?
In plpgsql (of PostgreSQL), can a CTE be preserved to an outer loop?
(根据原文编辑)
在 plpgsql (PostgreSQL 9.2) 中,我有一个函数定义为:
CREATE OR REPLACE FUNCTION test (patient_recid integer, tencounter timestamp without time zone)
RETURNS SETOF view_dx AS
$BODY$
#variable_conflict use_column
DECLARE
r view_dx%rowtype;
BEGIN
FOR r IN
With person AS (
select ....
)
, alldx AS (
select ....
)
............
select ... from first cte
union
select ... from second cte
union
etc., etc.,
LOOP
r.tposted = ( .
With person AS (
... SAME AS ABOVE,
alldx AS (
... SAME AS ABOVE,
)
select max(b.tposted)
from alldx b
where r.cicd9 = b.code and r.cdesc = b.cdesc);
r.treated = (
With person AS (
........SAME AS ABOVE )
, alldx AS (
........SAME AS ABOVE
)
select ...);
r.resolved = (
With person AS (
select p.chart_recid as recid
from patients p
where p.recid = patient_recid
)
...etc, etc,
RETURN NEXT r;
END LOOP;
RETURN;
END
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100
ROWS 1000;
ALTER FUNCTION test(integer, timestamp without time zone)
OWNER TO postgres;
编辑:本质上,我定义了多个 cte,它们在具有多个联合的代码的 "For r IN" 部分中运行良好,但是在执行 LOOP...END LOOP 部分时,每个 CTE 都需要重新定义每个 SELECT 语句。有什么好的方法可以避免同一 CTE 的多个定义?
或者是否有更好(即更快)的方法。
欢迎和赞赏所有建议。
TIA
[这不是答案(信息太少,程序太大),而是重写堆栈CTE的提示。]
联合的成员似乎都基于 select b.* from alldx b
,都有一些不同的额外条件,主要是基于同一 CTE 中其他元组的存在。我的建议是统一这些,用布尔标志替换它们,如:
WITH person AS (
SELECT p.chart_recid as recid
FROM patients p
WHERE p.recid = patient_recid
)
, alldx AS (
SELECT d.tposted, d.treated, d.resolved, d.recid as dx_recid, d.pmh, d.icd9_recid
, i.code, i.cdesc, i.chronic
FROM dx d
JOIN icd9 i ON d.icd9_recid = i.recid
JOIN person p ON d.chart_recid = p.recid
WHERE d.tposted::date <= tencounter::date
)
SELECT uni.tposted, uni.treated, uni.resolved, uni.dx_recid, uni.pmh, uni.icd9_recid
, uni.code, uni.cdesc, uni.chronic
, (uni.tposted::date = tencounter::date
) AS is_dx_at_encounter -- bitfield
, EXISTS ( -- a record from a more recent date has resolved this problem.
SELECT 1
FROM alldx x
WHERE x.resolved = true
AND uni.code = x.code AND uni.cdesc = x.cdesc AND uni.tposted = x.tposted
AND x.tposted >= uni.tposted
) AS dx_resolved -- bitfield
, EXISTS ( -- a record from a more recent date has resolved this problem.
SELECT 1
FROM alldx x
WHERE x.resolved = false
AND uni.code = x.code AND uni.cdesc = x.cdesc AND uni.tposted = x.tposted
AND x.tposted > uni.tposted
) AS dx_recurred -- bitfield
, EXISTS ( SELECT * from alldx x where x.chronic = true
AND uni.code = x.code AND uni.cdesc = x.cdesc
) AS dx_chronic -- bitfield
-- etcetera
FROM alldx uni
;
person
CTE 也可能被合并。
- 和也许你甚至不需要最后的循环
- 但您必须找出需要生成的位域的哪些组合。
- 原文中的
UNION
(没有ALL
)是一个可怕的野兽:它收集联合部分的所有结果,但必须删除重复项。这可能会引入一个排序步骤,因为 CTE 引用倾向于隐藏它们的关键字段或隐含调用查询的顺序。
据我所知,在 LOOP 之前定义的 CTE 不会转移到 LOOP 本身。但是,可以在 LOOP 块中可用的 BEGIN 块中定义临时 table。以下解决方案的运行速度比我的原始代码快 50 倍。有人有更好的方法吗?
CREATE OR REPLACE FUNCTION test2 (patient_recid integer, tencounter timestamp without time zone)
RETURNS SETOF view_dx AS
$BODY$
#variable_conflict use_column
DECLARE
r view_dx%rowtype;
BEGIN
-- create table can only be created in the BEGIN block
Create temp table all_dx ON COMMIT DROP AS
With person AS (
select p.chart_recid as recid
from patients p
where p.recid = patient_recid
)
, alldx AS (
select d.tposted, d.treated, d.resolved, d.recid as dx_recid, d.pmh, d.icd9_recid, i.code, i.cdesc, i.chronic
from dx d
join icd9 i on (d.icd9_recid = i.recid)
join person p on (d.chart_recid = p.recid)
where d.tposted::date <= tencounter::date
)
select * from alldx order by tposted desc;
-- will loop through all the records produced by the unions and assign tposted, pmh, chronic, etc...
FOR r IN
With
dx_at_encounter AS ( -- get all diagnosis at time of encounter
select code, cdesc from all_dx a
where a.tposted::date = tencounter::date
)
, dx_resolved AS ( -- get most recent date of every resolved problem.
select b.* from all_dx b
join (
select a.code, a.cdesc , max(tposted) as tposted
from all_dx a
where a.resolved = true
group by code,cdesc) j
on (b.code = j.code and b.cdesc = j.cdesc and b.tposted = j.tposted)
)
, never_resolved AS ( -- get all problems that have never been resolved before time of encounter.
-- "not exists" is applied to each select output row AFTER the output row b.* is formed.
select b.code, b.cdesc from all_dx b
where not exists
(select 1
from dx_resolved d
where b.code = d.code and b.cdesc = d.cdesc)
)
, recurrent AS ( -- get all recurrent problems. (Problems that are now current after being resolved).
select b.code, b.cdesc
from all_dx b
join dx_resolved r on (b.cdesc = r.cdesc and b.tposted::date > r.tposted::date )
where (b.resolved is null or b.resolved = false)
)
, chronic_dx AS (
select b.code, b.cdesc
from all_dx b
where b.chronic = true
)
-- all diagnosis at time of encounter
select a.code,
a.cdesc
from dx_at_encounter a
union
-- all recurrent problems
select
a.code,
a.cdesc
from recurrent a
union
-- all problems that have never been resolved
select
a.code,
a.cdesc
from never_resolved a
union
--all chonic problems
select
a.code,
a.cdesc
from chronic_dx a
-- LOOP goes to END LOOP which returns back to LOOP to process each of the result records from the unions.
LOOP
r.tposted = ( -- get most recent useage of a diagnosis.
select max(b.tposted)
from all_dx b
where r.cicd9 = b.code and r.cdesc = b.cdesc);
r.treated = (
select b.treated from all_dx b
where b.tposted = r.tposted and b.code = r.cicd9 and b.cdesc = r.cdesc);
r.resolved = (
select b.resolved from all_dx b
where b.tposted = r.tposted and b.code = r.cicd9 and b.cdesc = r.cdesc);
r.pmh = (
select distinct true
from all_dx b
where
b.pmh = true and
b.code = r.cicd9 and
b.cdesc = r.cdesc );
r.chronic = (
select distinct true
from all_dx b
where
b.chronic = true and
b.code = r.cicd9 and
b.cdesc = r.cdesc);
RETURN NEXT r; -- return current row of SELECT
END LOOP;
RETURN;
END
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100
ROWS 1000;
ALTER FUNCTION test2(integer, timestamp without time zone)
OWNER TO postgres;
(根据原文编辑)
在 plpgsql (PostgreSQL 9.2) 中,我有一个函数定义为:
CREATE OR REPLACE FUNCTION test (patient_recid integer, tencounter timestamp without time zone)
RETURNS SETOF view_dx AS
$BODY$
#variable_conflict use_column
DECLARE
r view_dx%rowtype;
BEGIN
FOR r IN
With person AS (
select ....
)
, alldx AS (
select ....
)
............
select ... from first cte
union
select ... from second cte
union
etc., etc.,
LOOP
r.tposted = ( .
With person AS (
... SAME AS ABOVE,
alldx AS (
... SAME AS ABOVE,
)
select max(b.tposted)
from alldx b
where r.cicd9 = b.code and r.cdesc = b.cdesc);
r.treated = (
With person AS (
........SAME AS ABOVE )
, alldx AS (
........SAME AS ABOVE
)
select ...);
r.resolved = (
With person AS (
select p.chart_recid as recid
from patients p
where p.recid = patient_recid
)
...etc, etc,
RETURN NEXT r;
END LOOP;
RETURN;
END
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100
ROWS 1000;
ALTER FUNCTION test(integer, timestamp without time zone)
OWNER TO postgres;
编辑:本质上,我定义了多个 cte,它们在具有多个联合的代码的 "For r IN" 部分中运行良好,但是在执行 LOOP...END LOOP 部分时,每个 CTE 都需要重新定义每个 SELECT 语句。有什么好的方法可以避免同一 CTE 的多个定义?
或者是否有更好(即更快)的方法。
欢迎和赞赏所有建议。
TIA
[这不是答案(信息太少,程序太大),而是重写堆栈CTE的提示。]
联合的成员似乎都基于 select b.* from alldx b
,都有一些不同的额外条件,主要是基于同一 CTE 中其他元组的存在。我的建议是统一这些,用布尔标志替换它们,如:
WITH person AS (
SELECT p.chart_recid as recid
FROM patients p
WHERE p.recid = patient_recid
)
, alldx AS (
SELECT d.tposted, d.treated, d.resolved, d.recid as dx_recid, d.pmh, d.icd9_recid
, i.code, i.cdesc, i.chronic
FROM dx d
JOIN icd9 i ON d.icd9_recid = i.recid
JOIN person p ON d.chart_recid = p.recid
WHERE d.tposted::date <= tencounter::date
)
SELECT uni.tposted, uni.treated, uni.resolved, uni.dx_recid, uni.pmh, uni.icd9_recid
, uni.code, uni.cdesc, uni.chronic
, (uni.tposted::date = tencounter::date
) AS is_dx_at_encounter -- bitfield
, EXISTS ( -- a record from a more recent date has resolved this problem.
SELECT 1
FROM alldx x
WHERE x.resolved = true
AND uni.code = x.code AND uni.cdesc = x.cdesc AND uni.tposted = x.tposted
AND x.tposted >= uni.tposted
) AS dx_resolved -- bitfield
, EXISTS ( -- a record from a more recent date has resolved this problem.
SELECT 1
FROM alldx x
WHERE x.resolved = false
AND uni.code = x.code AND uni.cdesc = x.cdesc AND uni.tposted = x.tposted
AND x.tposted > uni.tposted
) AS dx_recurred -- bitfield
, EXISTS ( SELECT * from alldx x where x.chronic = true
AND uni.code = x.code AND uni.cdesc = x.cdesc
) AS dx_chronic -- bitfield
-- etcetera
FROM alldx uni
;
person
CTE 也可能被合并。- 和也许你甚至不需要最后的循环
- 但您必须找出需要生成的位域的哪些组合。
- 原文中的
UNION
(没有ALL
)是一个可怕的野兽:它收集联合部分的所有结果,但必须删除重复项。这可能会引入一个排序步骤,因为 CTE 引用倾向于隐藏它们的关键字段或隐含调用查询的顺序。
据我所知,在 LOOP 之前定义的 CTE 不会转移到 LOOP 本身。但是,可以在 LOOP 块中可用的 BEGIN 块中定义临时 table。以下解决方案的运行速度比我的原始代码快 50 倍。有人有更好的方法吗?
CREATE OR REPLACE FUNCTION test2 (patient_recid integer, tencounter timestamp without time zone)
RETURNS SETOF view_dx AS
$BODY$
#variable_conflict use_column
DECLARE
r view_dx%rowtype;
BEGIN
-- create table can only be created in the BEGIN block
Create temp table all_dx ON COMMIT DROP AS
With person AS (
select p.chart_recid as recid
from patients p
where p.recid = patient_recid
)
, alldx AS (
select d.tposted, d.treated, d.resolved, d.recid as dx_recid, d.pmh, d.icd9_recid, i.code, i.cdesc, i.chronic
from dx d
join icd9 i on (d.icd9_recid = i.recid)
join person p on (d.chart_recid = p.recid)
where d.tposted::date <= tencounter::date
)
select * from alldx order by tposted desc;
-- will loop through all the records produced by the unions and assign tposted, pmh, chronic, etc...
FOR r IN
With
dx_at_encounter AS ( -- get all diagnosis at time of encounter
select code, cdesc from all_dx a
where a.tposted::date = tencounter::date
)
, dx_resolved AS ( -- get most recent date of every resolved problem.
select b.* from all_dx b
join (
select a.code, a.cdesc , max(tposted) as tposted
from all_dx a
where a.resolved = true
group by code,cdesc) j
on (b.code = j.code and b.cdesc = j.cdesc and b.tposted = j.tposted)
)
, never_resolved AS ( -- get all problems that have never been resolved before time of encounter.
-- "not exists" is applied to each select output row AFTER the output row b.* is formed.
select b.code, b.cdesc from all_dx b
where not exists
(select 1
from dx_resolved d
where b.code = d.code and b.cdesc = d.cdesc)
)
, recurrent AS ( -- get all recurrent problems. (Problems that are now current after being resolved).
select b.code, b.cdesc
from all_dx b
join dx_resolved r on (b.cdesc = r.cdesc and b.tposted::date > r.tposted::date )
where (b.resolved is null or b.resolved = false)
)
, chronic_dx AS (
select b.code, b.cdesc
from all_dx b
where b.chronic = true
)
-- all diagnosis at time of encounter
select a.code,
a.cdesc
from dx_at_encounter a
union
-- all recurrent problems
select
a.code,
a.cdesc
from recurrent a
union
-- all problems that have never been resolved
select
a.code,
a.cdesc
from never_resolved a
union
--all chonic problems
select
a.code,
a.cdesc
from chronic_dx a
-- LOOP goes to END LOOP which returns back to LOOP to process each of the result records from the unions.
LOOP
r.tposted = ( -- get most recent useage of a diagnosis.
select max(b.tposted)
from all_dx b
where r.cicd9 = b.code and r.cdesc = b.cdesc);
r.treated = (
select b.treated from all_dx b
where b.tposted = r.tposted and b.code = r.cicd9 and b.cdesc = r.cdesc);
r.resolved = (
select b.resolved from all_dx b
where b.tposted = r.tposted and b.code = r.cicd9 and b.cdesc = r.cdesc);
r.pmh = (
select distinct true
from all_dx b
where
b.pmh = true and
b.code = r.cicd9 and
b.cdesc = r.cdesc );
r.chronic = (
select distinct true
from all_dx b
where
b.chronic = true and
b.code = r.cicd9 and
b.cdesc = r.cdesc);
RETURN NEXT r; -- return current row of SELECT
END LOOP;
RETURN;
END
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100
ROWS 1000;
ALTER FUNCTION test2(integer, timestamp without time zone)
OWNER TO postgres;