在 plpgsql(PostgreSQL 的)中,CTE 可以保留到外循环吗?

In plpgsql (of PostgreSQL), can a CTE be preserved to an outer loop?

(根据原文编辑)

在 plpgsql (PostgreSQL 9.2) 中,我有一个函数定义为:

CREATE OR REPLACE FUNCTION test (patient_recid integer, tencounter timestamp without time zone)
  RETURNS SETOF view_dx AS
$BODY$ 

#variable_conflict use_column

DECLARE
    r view_dx%rowtype;

BEGIN

FOR r IN 

With person AS (
    select ....
   )
, alldx AS (
    select ....
)
............

select  ... from first cte 
union 
select ... from second cte
union
etc., etc.,  

LOOP
    r.tposted = (       .
            With person AS (
                ... SAME AS ABOVE, 
                      alldx AS (
                ... SAME AS ABOVE,
            )
            select max(b.tposted)
            from alldx b
            where r.cicd9 = b.code and r.cdesc = b.cdesc);

    r.treated = (   
                With person AS (
                ........SAME AS ABOVE           )
                , alldx AS (
                ........SAME AS ABOVE   
                )
                select ...);

    r.resolved =  (     
                With person AS (
                select p.chart_recid as recid
                from patients p
                where p.recid = patient_recid
                )
                ...etc, etc, 

     RETURN NEXT r; 

END LOOP;

RETURN;

END

$BODY$
  LANGUAGE plpgsql VOLATILE
  COST 100
  ROWS 1000;
ALTER FUNCTION test(integer, timestamp without time zone)
  OWNER TO postgres;

编辑:本质上,我定义了多个 cte,它们在具有多个联合的代码的 "For r IN" 部分中运行良好,但是在执行 LOOP...END LOOP 部分时,每个 CTE 都需要重新定义每个 SELECT 语句。有什么好的方法可以避免同一 CTE 的多个定义?

或者是否有更好(即更快)的方法。

欢迎和赞赏所有建议。

TIA

[这不是答案(信息太少,程序太大),而是重写堆栈CTE的提示。]

联合的成员似乎都基于 select b.* from alldx b,都有一些不同的额外条件,主要是基于同一 CTE 中其他元组的存在。我的建议是统一这些,用布尔标志替换它们,如:

WITH person AS (
    SELECT p.chart_recid as recid
    FROM patients p
    WHERE p.recid = patient_recid
)
, alldx AS (
    SELECT d.tposted, d.treated, d.resolved, d.recid as dx_recid, d.pmh, d.icd9_recid
        , i.code, i.cdesc, i.chronic
    FROM dx d
    JOIN icd9 i ON d.icd9_recid = i.recid
    JOIN person p ON d.chart_recid = p.recid
    WHERE d.tposted::date <= tencounter::date
)
SELECT uni.tposted, uni.treated, uni.resolved, uni.dx_recid, uni.pmh, uni.icd9_recid
        , uni.code, uni.cdesc, uni.chronic
        , (uni.tposted::date = tencounter::date
                ) AS is_dx_at_encounter -- bitfield
        , EXISTS ( -- a record from a more recent date has resolved this problem.
                SELECT 1
                FROM alldx x
                WHERE x.resolved = true
                AND  uni.code = x.code AND uni.cdesc = x.cdesc AND uni.tposted = x.tposted
                AND x.tposted >= uni.tposted
                ) AS dx_resolved -- bitfield
        , EXISTS ( -- a record from a more recent date has resolved this problem.
                SELECT 1
                FROM alldx x
                WHERE x.resolved = false
                AND  uni.code = x.code AND uni.cdesc = x.cdesc AND uni.tposted = x.tposted
                AND x.tposted > uni.tposted
                ) AS dx_recurred -- bitfield
        , EXISTS ( SELECT * from alldx x where x.chronic = true
                AND uni.code = x.code AND uni.cdesc = x.cdesc
                ) AS dx_chronic -- bitfield
        -- etcetera
FROM alldx uni
        ;
  • person CTE 也可能被合并。
  • 也许你甚至不需要最后的循环
  • 但您必须找出需要生成的位域的哪些组合。
  • 原文中的UNION(没有ALL)是一个可怕的野兽:它收集联合部分的所有结果,但必须删除重复项。这可能会引入一个排序步骤,因为 CTE 引用倾向于隐藏它们的关键字段或隐含调用查询的顺序。

据我所知,在 LOOP 之前定义的 CTE 不会转移到 LOOP 本身。但是,可以在 LOOP 块中可用的 BEGIN 块中定义临时 table。以下解决方案的运行速度比我的原始代码快 50 倍。有人有更好的方法吗?

CREATE OR REPLACE FUNCTION test2 (patient_recid integer, tencounter timestamp without time zone)
  RETURNS SETOF view_dx AS
$BODY$ 

#variable_conflict use_column



DECLARE
    r view_dx%rowtype;


BEGIN
    -- create table can only be created in the BEGIN block
    Create temp table all_dx ON COMMIT DROP AS
    With person AS (
        select p.chart_recid as recid
        from patients p
        where p.recid = patient_recid
        )
       , alldx AS (
        select d.tposted, d.treated, d.resolved, d.recid as dx_recid, d.pmh, d.icd9_recid, i.code, i.cdesc, i.chronic
        from dx d
        join icd9 i on (d.icd9_recid = i.recid)
        join person p on (d.chart_recid = p.recid)
        where d.tposted::date <= tencounter::date
        )
      select * from alldx order by tposted desc;    

-- will loop through all the records produced by the unions and assign tposted, pmh, chronic, etc...
FOR r IN 

With 
dx_at_encounter AS (        -- get all diagnosis at time of encounter
    select code, cdesc from all_dx a
    where a.tposted::date = tencounter::date
)
, dx_resolved AS (              -- get most recent date of every resolved problem.
    select b.* from all_dx b
    join (
        select a.code, a.cdesc , max(tposted) as tposted
        from all_dx a
        where a.resolved = true 
        group by code,cdesc) j
    on (b.code = j.code and b.cdesc = j.cdesc and b.tposted = j.tposted)    
)
, never_resolved AS (       -- get all problems that have never been resolved before time of encounter.
                -- "not exists" is applied to each select output row AFTER the output row b.* is formed.
    select b.code, b.cdesc from all_dx b
    where not exists 
        (select 1 
         from dx_resolved d
         where b.code = d.code and b.cdesc = d.cdesc) 
)
, recurrent AS (        -- get all recurrent problems. (Problems that are now current after being resolved).
    select  b.code, b.cdesc
    from all_dx b
    join dx_resolved r on (b.cdesc = r.cdesc and b.tposted::date > r.tposted::date )
    where (b.resolved is null or b.resolved = false)  

)
, chronic_dx AS (
    select b.code, b.cdesc
    from all_dx b
    where b.chronic = true
)

-- all diagnosis at time of encounter
select  a.code, 
    a.cdesc
from dx_at_encounter a

union 
-- all recurrent problems
select 
    a.code, 
    a.cdesc
from recurrent a

union

-- all problems that have never been resolved
select 
    a.code, 
    a.cdesc
from never_resolved a

union

--all chonic problems
select 
    a.code, 
    a.cdesc
from chronic_dx a

-- LOOP goes to END LOOP which returns back to LOOP to process each of the result records from the unions.
LOOP
    r.tposted = (       -- get most recent useage of a diagnosis.
            select max(b.tposted)
            from all_dx b
            where r.cicd9 = b.code and r.cdesc = b.cdesc);

    r.treated = (   
            select b.treated from all_dx b
            where b.tposted = r.tposted and b.code = r.cicd9 and b.cdesc = r.cdesc);

    r.resolved =  (     
            select b.resolved from all_dx b
                where b.tposted = r.tposted and b.code = r.cicd9 and b.cdesc = r.cdesc);

    r.pmh = (
            select distinct true 
            from all_dx b
            where
            b.pmh = true and 
            b.code = r.cicd9 and 
            b.cdesc = r.cdesc ); 

    r.chronic = (
            select distinct true 
            from all_dx b
            where
            b.chronic = true and 
            b.code = r.cicd9 and 
            b.cdesc = r.cdesc); 

     RETURN NEXT r; -- return current row of SELECT

END LOOP;

RETURN;

END

$BODY$
  LANGUAGE plpgsql VOLATILE
  COST 100
  ROWS 1000;
ALTER FUNCTION test2(integer, timestamp without time zone)
  OWNER TO postgres;