查询执行时间太长,成本高得难以置信

Query taking too long to execute and a unbelievable cost

我目前正在编写一个查询,我 运行 解释以检查 运行 它所花费的查询成本和时间。我运行解释后发现成本太高了。请找到以下查询。

create table l1.dm_sc_temp_pl_activity_p as 
select rel,licamt,tcamt, spsc, pdn, pmn4, pcs, clsldt, pci, cpdt, orgtin, bt, pcpldt, slc, sps, pm3, "ln", idt, ddt, rps, df, dat, pz, rpn, pih, pipid, pcpn, psa2, ingdt, incid, uqt, 
acoid||orgid  as orgid, pn, fn, pc, pm5, pm4, rpnpi, pfdt, sfn, catp, pdcs, pm1, st, pl, cltc, cpat, incn, gn, adjsn, clsfdt, acoid, sstp, orgn, sln, sid, drg, rcdt, tel3, pm2, rpsc, pmn3,
acoid||orgid||orgrid as orgrid, alat, inpn, tel2, drgn, pcpidt, acon, pcpid, id, pcpslid, pd, dob, psa1, orgrn, pcpslidt, pcy, pcpsln, pmn2, pmn5, cid, inpid, adjs, pmn1, eldt, empi, pcpfdt, cltp, tel1, pldt, pstn, spn, spnpi, mn, clid, 
efdt, capdt,ssa1,ssa2,sci,sstn,scy,sz,ftsa1,ftstn,ftz,ftci,ad1,ad2,ad3,ad4,ad5,
ad6,ad7,ad8,ad9,ad10,ad11,ad12,ad13,ad14,ad15,ad16,ad17,ad18,ad19,ad20,ad21,ad22,ad23,ad24,ad25,ad26,ad27,ad28,ad29,
adn1,adn2,adn3,adn4,adn5,adn6,adn7,adn8,adn9,adn10,adn11,adn12,adn13,adn14,
adn15,adn16,adn17,adn18,adn19,adn20,adn21,adn22,adn23,adn24,adn25,adn26,adn27,adn28,adn29 
from (
select  
 c.prov_addr1 as ftsa1,
c.prov_state as ftstn,
c.prov_zip as ftz,
c.prov_city as ftci,
c.iscap as rel,
cast(c.claimsvc_billedamt as decimal(18,2)) as licamt,
cast(c.claim_billedamt as decimal(18,2))  as tcamt,
trim(m.id) as id ,trim(m.tel1) as tel1,trim(m.tel2) as tel2,trim(m.tel3)as tel3,
'' as empi,trim(m.gn) as gn,psa1,psa2,pci,pcpldt,pl,trim(mn) as mn,trim(pz) as pz,trim(ln) as ln,
cast(m.dob as date) as dob,'N' as df,m.pcy,trim(m.fn) as fn,m.pcpfdt,m.pstn,
ssa1,ssa2,sci,sstn,scy,trim(sz) as sz,
--dm_prov
case when (trim(p.npi) in ('NULL','NA','')) then '' else trim(p.npi) end as pcpid,
case when (trim(p.npi) in ('NULL','NA','')) then '' else 'NPI' end as pcpidt,
p.lastname||','||p.firstname||' '||p.middlename as pcpn,
-- dm_vend
case when (trim(v.taxid) in ('NULL','NA','')) then '' else trim(v.taxid) end as pcpslid,
v.vendname as pcpsln,
case when (trim(v.taxid) in ('NULL','NA','')) then '' else 'NPI'  end as pcpslidt,
-- level
case when l."HPName"='Aetna Health of California' then '13'
when l."HPName"='Health Net' then '14'
when l."HPName"='Blue Shield of California' then '12'
when l."HPName"='United Healthcare of California' then '15'
when l."HPName"='Cigna for Health Care Professionals' then '16'
when l."HPName"='Anthem Blue Cross' then '17' end as incid,
l."HPName" as incn,
Case
when l."Progdescr"='DHMNSC United Healthcare Commercial' then '30'
when l."Progdescr"='DHMNSC Blue Shield 65 Plus' then '29'
when l."Progdescr"='DHMNSC United Healthcare Canopy Commercial' then '31'
when l."Progdescr"='DHMNSC United Healthcare Medicare Solutions' then '32'
when l."Progdescr"='DHMNSC Blue Shield Commercial' then '28'
when l."Progdescr"  in ('DHMNSC Health Net SmartCare Commercial','DHMNSC Health Net Canopy UC Blue and Gold Commercial',
'DHMNSC Health Net Canopy Care Commercial') then '33'
when l."Progdescr"='DHMNSC Aetna Commercial' then '34'
when l."Progdescr"='DHMNSC Cigna Commercial' then '35'
when l."Progdescr"='DHMNSC Health Net Commercial' then '36'
when l."Progdescr"='DHMNSC Anthem Blue Cross Commercial' then '37'
when l."Progdescr"='DHMNSC Health Net Seniority Plus' then '38'
when l."Progdescr"='DHMNSC United Healthcare Canopy Medicare Advantage' then '50' end as inpid,
case when l."Progdescr"  in ('DHMNSC Health Net SmartCare Commercial','DHMNSC Health Net Canopy UC Blue and Gold Commercial',
'DHMNSC Health Net Canopy Care Commercial') then 'DHMNSC Health Net SmartCare Commercial'
else l."Progdescr"  end as inpn,
'3' as acoid,
'DHMN-SC' as acon,
case when l."LOB" ='M' then '6'
    when l."LOB" ='C' then '7' else '8' end as orgid,
case when l."LOB"='C' then 'Commercial'when l."LOB"='M' then 'Medicare' else "LOB" end as orgn,
case when  l."HPCode" not in ('PGM000295','PGM000294','PGM000333','PGM000332','PGM000326') then '7'  
     when l."HPCode" in ('PGM000295','PGM000294','PGM000333','PGM000332','PGM000326') then '9' else '8' end as orgrid,
case when  l."HPCode" not in ('PGM000295','PGM000294','PGM000333','PGM000332','PGM000326') then 'Non Canopy'  
     when l."HPCode" in ('PGM000295','PGM000294','PGM000333','PGM000332','PGM000326') then 'Canopy' else 'Other MedGroup' end as orgrn,
-- hardcode columns
'3a1bcd23-d88c-4c70-8ffa-267673232647' as pipid,
'Claims' as st,
l."HPName" as sstp,
'DataMart.bak' as sfn,
current_date as rcdt,
current_date as ingdt,
'claimline' as cltp,
'5' as sid,
'Patient ID' as idt,
 -- dm_claim
trim(c.claimno) as cid,
trim(c.seq) as clid,
'Professional' as cltc,
case when (trim(c.billtype) in ('','NULL','NA') or trim(c.billtype) is null)
    then '' else trim(c.billtype) end as bt,
case when (trim(c.claim_fromdate) in ('','NULL','NA'))then null 
    else cast(c.claim_fromdate as date) end as efdt,
case when (trim(c.claim_todate) in ('','NULL','NA'))then null 
    else cast(c.claim_todate as date) end as eldt,
case when (trim(c.claim_recddate) in ('','NULL','NA'))then null 
    else cast(c.claim_recddate as date) end as cpdt,
case when (trim(c.claim_paiddate) in ('','NULL','NA'))then null 
    else cast(c.claim_paiddate as date) end as capdt,
case when (c.claim_netamt::decimal(18,2)='0' and c.iscap='1') then cast(c.claim_contramt as decimal(18,2)) 
else cast(c.claim_netamt as decimal(18,2))  end as pih,
case when trim(c.claimno)  like '%A1' then 'A'
     when trim(c.claimno)  like '%A2' then 'A'
     when trim(c.claimno)  like '%A3' then 'A'
    when trim(c.claimno)  like '%A4' then 'A'
     when trim(c.claimno)  like '%R1' then 'C' 
     when trim(c.claimno)  like '%R2' then 'C' 
     when trim(c.claimno)  like '%R3' then 'C' 
     when trim(c.claimno)  like '%R4' then 'C' 
     when trim(c.claimno)  like '%R5' then 'C' else 'O' end as adjs,
case when trim(c.claimno)  like '%A1' then 'Adjusted'
     when trim(c.claimno)  like '%A2' then 'Adjusted'
     when trim(c.claimno)  like '%A3' then 'Adjusted'
    when trim(c.claimno)  like '%A4' then 'Adjusted'
     when trim(c.claimno)  like '%R1' then 'Cancelled' 
     when trim(c.claimno)  like '%R2' then 'Cancelled' 
     when trim(c.claimno)  like '%R3' then 'Cancelled' 
     when trim(c.claimno)  like '%R4' then 'Cancelled' 
     when trim(c.claimno)  like '%R5' then 'Cancelled' else 'Original' end as adjsn,
case when (trim(c.taxid) in ('','NULL','NA') or trim(c.taxid) is null)
    then '' else trim(c.taxid) end as orgtin, 
case when (trim(c.npi ) in ('','NULL','NA') or trim(c.npi) is null)
    then '' else trim(c.npi) end as spnpi,
case when (c.prov_name in ('','NULL','NA') or c.prov_name is null) then p1.name else c.prov_name end as spn,
p1.ptxcd as spsc,
p1.psn as sps,
case when (trim(c.drgcode ) in ('','NULL','NA') or trim(c.drgcode) is null)
    then '' else trim(c.drgcode) end as drg,
c.drg_descr as drgn,
case when (trim(c.claimsvc_fromdate) in ('','NULL','NA'))then null 
    else cast(c.claimsvc_fromdate as date) end as clsfdt,
case when (trim(c.claimsvc_todate) in ('','NULL','NA'))then null 
    else cast(c.claimsvc_todate as date) end  as clsldt,
case when (trim(c.claimsvc_qty ) in ('','NULL','NA') or trim(c.claimsvc_qty) is null)
    then '' else trim(c.claimsvc_qty) end as uqt,
--cast(c.claimsvc_netamt as decimal(18,2)) as catp,
case when c.iscap='1' then cast(c.claimsvc_contramt as decimal(18,2)) else cast(c.claimsvc_netamt as decimal(18,2)) end as catp,
cast(c.claimsvc_allowedamt as decimal(18,2)) as alat,
cast(c.claimsvc_copayamt as decimal(18,2)) as cpat,
cast(c.claimsvc_deductamt as decimal(18,2)) as dat,
case when (trim(c.svccode ) in ('','NULL','NA') or trim(c.svccode) is null)
    then '' else trim(c.svccode) end as pc,
case when (trim(c.svc_longdescr ) in ('','NULL','NA') or trim(c.svc_longdescr) is null)
    then '' else trim(c.svc_longdescr) end as pn,
    ont.csys as pcs,
case when (trim(c.claimsvc_fromdate) in ('','NULL','NA'))then null 
    else cast(c.claimsvc_fromdate as date) end as pfdt,                      
case when (trim(c.claimsvc_todate) in ('','NULL','NA'))then null 
    else cast(c.claimsvc_todate as date) end  as pldt,                       
mf.pm1 ,
mf.pm2 ,
mf.pm3,
mf.pm4,
mf.pm5 ,
mf.pmn1,
mf.pmn2 ,
mf.pmn3,
mf.pmn4,
mf.pmn5,
case when c.poscode in('01','1') then 'Pharmacy'
when c.poscode in ('02','2') then 'Telehealth'
when c.poscode in('03','3') then 'School'
when c.poscode in ('04','4') then 'Homeless Shelter'
when c.poscode in ('05','5') then 'Indian Health Service Free-standing Facility'
when c.poscode='06' then 'Indian Health Service Provider-based Facility'
when c.poscode in ('07','7') then 'Tribal 638 Free-standing Facility'
when c.poscode in ('08','8') then 'Tribal 638 Provider-based Facility'
when c.poscode in('09','9') then 'Prison/Correctional Facility'
when c.poscode in ('10','77') then 'Unassigned'
when c.poscode='11' then 'Office'
when c.poscode='12' then 'Home'
when c.poscode='13' then 'Assisted Living Facility'
when c.poscode='14' then 'Group Home'
when c.poscode='15' then 'Mobile Unit'
when c.poscode='16' then 'Temporary Lodging'
when c.poscode='17' then 'Walk-in Retail Health Clinic'
when c.poscode='18' then 'Place of Employment-Worksite'
when c.poscode='19' then 'Off Campus-Outpatient Hospital'
when c.poscode='20' then 'Urgent Care facility'
when c.poscode='21' then 'Inpatient Hospital'
when c.poscode='22' then 'On Campus-Outpatient Hospital'
when c.poscode='23' then 'Emergency Room – Hospital'
when c.poscode in ('24','B') then 'Ambulatory Surgical Center'
when c.poscode='25' then 'Birthing Center'
when c.poscode='26' then 'Military Treatment Facility'
when c.poscode='31' then 'Skilled Nursing Facility'
when c.poscode='32' then 'Nursing Facility'
when c.poscode='33' then 'Custodial Care Facility'
when c.poscode='34' then 'Hospice'
when c.poscode='41' then 'Ambulance - Land'
when c.poscode='42' then 'Ambulance – Air or Water'
when c.poscode='49' then 'Independent Clinic'
when c.poscode='50' then 'Federally Qualified Health Center'
when c.poscode='51' then 'Inpatient Psychiatric Facility'
when c.poscode='52' then 'Psychiatric Facility-Partial Hospitalization'
when c.poscode='53' then 'Community Mental Health Center'
when c.poscode='54' then 'Intermediate Care Facility/ Individuals with Intellectual Disabilities'
when c.poscode='55' then 'Residential Substance Abuse Treatment Facility'
when c.poscode='56' then 'Psychiatric Residential Treatment Center'
when c.poscode='57' then 'Non-residential Substance Abuse Treatment Facility'
when c.poscode='60' then 'Mass Immunization Center'
when c.poscode='61' then 'Comprehensive Inpatient Rehabilitation Facility'
when c.poscode='62' then 'Comprehensive Outpatient Rehabilitation Facility'
when c.poscode='65' then 'End-Stage Renal Disease Treatment Facility'
when c.poscode='71' then 'Public Health Clinic'
when c.poscode='72' then 'Rural Health Clinic'
when c.poscode in ('81','A') then 'Independent Laboratory'
when c.poscode in ('99','?','O') then 'Other Place of Service' end as sln,
case when c.poscode in ('O','?') then '99'
when c.poscode='A' then '81'
when c.poscode='B' then '24'
when c.poscode in('1','2','3','4','5','6','7','8','9') then '0'||c.poscode else c.poscode end slc,
case when (trim(c.pd ) in ('','NULL','NA') or trim(c.pd ) is null)
    then '' else trim(c.pd) end as pd,
case when (trim(c.pdn ) in ('','NULL','NA') or trim(c.pdn) is null)
    then '' else trim(c.pdn) end as pdn,
pdcs as pdcs,   
case when (trim(c.pd ) in ('','NULL','NA') or trim(c.pd ) is null)
    then Null else (
    case when (trim(c.claimsvc_fromdate) in ('','NULL','NA'))then null 
    else cast(c.claimsvc_fromdate as date) end )end as ddt,
case when (trim(c.ref_npi ) in ('','NULL','NA') or trim(c.ref_npi) is null)
    then '' else trim(c.ref_npi) end as rpnpi,
    p2."name" as rpn,
p2.ptxcd as rpsc,
p2.psn as rps,
ad1,ad2,ad3,ad4,ad5,
ad6,ad7,ad8,ad9,ad10,ad11,ad12,ad13,ad14,ad15,ad16,ad17,ad18,ad19,ad20,ad21,ad22,ad23,ad24,ad25,ad26,ad27,ad28,ad29,
adn1,adn2,adn3,adn4,adn5,adn6,adn7,adn8,adn9,adn10,adn11,adn12,adn13,adn14,
adn15,adn16,adn17,adn18,adn19,adn20,adn21,adn22,adn23,adn24,adn25,adn26,adn27,adn28,adn29
from (select * from l1.dm_proc_memb 
where hpcode in ('PGM000333','PGM000332','PGM000326','PGM000268', 'PGM000269', 'PGM000270', 'PGM000272', 'PGM000273', 'PGM000274', 'PGM000275', 'PGM000276', 'PGM000294', 'PGM000295')) m 
inner join l1.dm_proc_claim_p c 
on m.memb_id=c.claim_memb
left join l1.dm_proc_modif mf 
on mf.claimsvc_id=c.claimsvc_id 
left join l1."dm_prov" p on p.prov_id =m.prov_id
left join l1."dm_vend" v on v.vend_id =m.vend_id
left join (select * from l1."level" where "HPCode" in ('PGM000333','PGM000332','PGM000326','PGM000268', 'PGM000269', 'PGM000270', 'PGM000272', 'PGM000273', 'PGM000274', 'PGM000275', 'PGM000276', 'PGM000294', 'PGM000295')) l
on trim(l."HPCode" )=trim(m.hpcode)
left join l2.pd_npi p1 on trim(c.npi)=trim(p1.npi)
left join (select * from l2.pd_ontology where dtp ='procedure' )ont
on trim(ont.cval) =trim(c.svccode )
--left join l2.pd_ontology ontd
--on trim(ontd.cval) =trim(c.pd) and ontd.dtp ='diagnosis'
left join l2.pd_npi p2 on trim(c.ref_npi )=trim(p2.npi)
)f

我 运行 解释了这个查询的命令,我观察到以下操作:

  Merge Cond: ((btrim((p2.npi)::text)) = (btrim((c.ref_npi)::text)))
  ->  Sort  (cost=1761851.74..1785989.13 rows=9654958 width=108)
        Sort Key: (btrim((p2.npi)::text))
        ->  Seq Scan on pd_npi p2  (cost=0.00..641739.58 rows=9654958 width=108)
  ->  Materialize  (cost=159139479153552696279040.00..159226719441637676679168.00 rows=17448057616994486190080 width=3383)
        ->  Sort  (cost=159139479153552696279040.00..159183099297595186479104.00 rows=17448057616994486190080 width=3383)
              Sort Key: (btrim((c.ref_npi)::text))
              ->  Merge Join  (cost=456947875487.57..261731712177051860992.00 rows=17448057616994486190080 width=3383)
                    Merge Cond: ((c.claim_memb)::text = (dm_proc_memb.memb_id)::text)
                    ->  Nested Loop Left Join  (cost=157761.50..10839427546273626.00 rows=2707460197662070 width=2450)
                          Join Filter: (btrim((c.npi)::text) = btrim((p1.npi)::text))
                          ->  Nested Loop Left Join  (cost=157761.50..9586754880584.63 rows=56084349568 width=2353)
                                Join Filter: (btrim((pd_ontology.cval)::text) = btrim((c.svccode)::text))
                                ->  Nested Loop Left Join  (cost=0.56..9362416722695.26 rows=43719408 width=2348)
                                      Join Filter: ((mf.claimsvc_id)::text = (c.claimsvc_id)::text)
                                      ->  Index Scan Backward using l1_dm_proc_claims_p_id_idx on dm_proc_claim_p c  (cost=0.56..17080655.54 rows=43719408 width=2036)
                                      ->  Materialize  (cost=0.00..376215.44 rows=14276496 width=352)
                                            ->  Seq Scan on dm_proc_modif mf  (cost=0.00..304832.96 rows=14276496 width=352)
                                ->  Materialize  (cost=157760.93..760260.39 rows=256565 width=14)
                                      ->  Bitmap Heap Scan on pd_ontology  (cost=157760.93..758977.56 rows=256565 width=14)
                                            Recheck Cond: ((dtp)::text = 'procedure'::text)
                                            ->  Bitmap Index Scan on pd_ont_idx_dtp  (cost=0.00..157696.79 rows=256565 width=0)
                                                  Index Cond: ((dtp)::text = 'procedure'::text)
                          ->  Materialize  (cost=0.00..690014.37 rows=9654958 width=108)
                                ->  Seq Scan on pd_npi p1  (cost=0.00..641739.58 rows=9654958 width=108)
                    ->  Materialize  (cost=456947717726.07..459097227591.84 rows=429901973154 width=972)
                          ->  Sort  (cost=456947717726.07..458022472658.96 rows=429901973154 width=972)
                                Sort Key: dm_proc_memb.memb_id NULLS FIRST
                                ->  Merge Right Join  (cost=82668031.11..6531565545.79 rows=429901973154 width=972)
                                      Merge Cond: ((v.vend_id)::text = (dm_proc_memb.vend_id)::text)
                                      ->  Sort  (cost=88160.95..89633.33 rows=588951 width=96)
                                            Sort Key: v.vend_id
                                            ->  Seq Scan on dm_vend v  (cost=0.00..31716.51 rows=588951 width=96)
                                      ->  Materialize  (cost=82579870.16..83309815.39 rows=145989046 width=940)
                                            ->  Sort  (cost=82579870.16..82944842.77 rows=145989046 width=940)
                                                  Sort Key: dm_proc_memb.vend_id
                                                  ->  Merge Right Join  (cost=165139.84..2405511.13 rows=145989046 width=940)
                                                        Merge Cond: ((p.prov_id)::text = (dm_proc_memb.prov_id)::text)
                                                        ->  Index Scan using dm_prov_prov_id on dm_prov p  (cost=0.42..49937.79 rows=239291 width=160)
                                                        ->  Sort  (cost=165139.42..165444.47 rows=122018 width=844)
                                                              Sort Key: dm_proc_memb.prov_id
                                                              ->  Hash Left Join  (cost=485.00..154830.89 rows=122018 width=844)
                                                                    Hash Cond: (btrim((dm_proc_memb.hpcode)::text) = btrim((level."HPCode")::text))
                                                                    ->  Bitmap Heap Scan on dm_proc_memb  (cost=481.71..153985.68 rows=122018 width=812)
                                                                          Recheck Cond: ((hpcode)::text = ANY ('{PGM000333,PGM000332,PGM000326,PGM000268,PGM000269,PGM000270,PGM000272,PGM000273,PGM000274,PGM000275,PGM000276,PGM000294,PGM000295}'::text[]))
                                                                          ->  Bitmap Index Scan on l1_proc_memb_id_idx  (cost=0.00..451.20 rows=122018 width=0)
                                                                    ->  Hash  (cost=3.13..3.13 rows=13 width=64)
                                                                          ->  Seq Scan on level  (cost=0.00..3.13 rows=13 width=64)
                                                                                Filter: (("HPCode")::text = ANY ('{PGM000333,PGM000332,PGM000326,PGM000268,PGM000269,PGM000270,PGM000272,PGM000273,PGM000274,PGM000275,PGM000276,PGM000294,PGM000295}'::text[]))

查询花费的成本太大了。我怎样才能降低成本并节省时间。 P.S 我分析发现左连接增加了成本。请告诉我如何管理它。

表的最大大小为 32 GB。

谢谢!!

这是一个很大的查询,对于 Whosebug 志愿者来说可能太大了,无法解决问题。不过,我确实注意到一件事:您多次出现

left join t on trim(t.col) = trim(a.col)   /* slow! */

这是一个 query-performance 反模式。如果您想要良好的性能,您应该竭尽全力避免在 WHERE 和 ON 子句中对列值调用函数。您希望这些子句中的所有列都是 sargable。不过,您可以在常量值上调用函数。

您的 objective 应该将 LEFT JOIN 读作:

left join t on t.col = a.col

并且你应该确定ON子句的两侧具有完全相同的数据类型;这使得数据库服务器更容易有效地进行连接。

您的怪物查询似乎有一个表示层,该表示层带有一个长 SELECT 子句,其中包含许多复杂的列定义。然后,在它的最后 15 行左右,它有一个以 (select.... 开头的 data-retrieval 子查询。如果您正在尝试优化查询,明智的做法是专注于 data-retrieval 子查询,剥离表示层。

使 ON 和 WHERE 子句可搜索后,您可以继续查看表的索引。

除了 O.Jones 指出的内容之外,所有那些你确实有 TRIM() 调用的实例,你应该存储你的数据 pre-trimmed 以防止调用 trim到处都是。

另一个使事情变得更容易的项目是进行一些查找 table,如果它们不存在的话。您拥有所有这些 CASE/WHEN 上下文检查,例如 HPName、Progdesc 和 HPCode 值。您应该能够在查找 table 中找到它们并加入它们以获得给定的 ID。更典型的是,最好对 ID 和 Name/Description 进行一次查找 table 一次,这样您就不会重复所有具有文字值 'Aetna Health of California' 或 [=17= 的记录].您的 child-level table 的提供者 ID 应为 12、13、14 等,而不是长名称。这就是事情应该进行的地方。

祝你好运。