基于过滤器值的查询的 Postgres 性能问题
Postgres performance issue with query based on filter values
我不是 Postgres 方面的专家,但我正在尝试理解这种奇怪的行为,也许你们中的一些人可以给我一些见解。
涉及到的表和索引就这些
表
CREATE TABLE swp_am_hcbe_pro.submissions
(
id bigint NOT NULL DEFAULT nextval('swp_am_hcbe_pro.submissions_id_seq'::regclass),
application_id bigint NOT NULL,
transaction_names_id bigint NOT NULL,
"timestamp" timestamp without time zone NOT NULL,
submission_status character varying(32) COLLATE pg_catalog."default" NOT NULL,
submission_type character varying(16) COLLATE pg_catalog."default" NOT NULL,
exit_code character varying(32) COLLATE pg_catalog."default",
ignore_partner_status boolean NOT NULL DEFAULT false,
ignore_sell_partner_status boolean NOT NULL DEFAULT false,
ignore_exclusion_rules boolean NOT NULL DEFAULT false,
dpa_iban character varying(32) COLLATE pg_catalog."default",
dpa_bic character varying(32) COLLATE pg_catalog."default",
dpa_id bigint,
dpa_blz bigint,
dda_iban character varying(32) COLLATE pg_catalog."default",
dda_bic character varying(32) COLLATE pg_catalog."default",
dda_id bigint,
dda_blz bigint,
dda_sepa_mandate_ref character varying(128) COLLATE pg_catalog."default",
use_different_sepa_mandate character varying(34) COLLATE pg_catalog."default",
use_manual_limit_extension boolean NOT NULL DEFAULT false,
use_automatic_limit_extension boolean NOT NULL DEFAULT false,
json_payload text COLLATE pg_catalog."default" NOT NULL,
final_timestamp timestamp without time zone,
CONSTRAINT submissions_pkey PRIMARY KEY (id),
CONSTRAINT submission_app_id FOREIGN KEY (application_id)
REFERENCES swp_am_hcbe_pro.applications (id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE CASCADE,
CONSTRAINT submission_transaction_names_id FOREIGN KEY (transaction_names_id)
REFERENCES swp_am_hcbe_pro.transaction_names (id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE NO ACTION,
CONSTRAINT chk_submission_status CHECK (submission_status::text = ANY (ARRAY['ERROR'::character varying, 'DENIED'::character varying, 'PROCESSED'::character varying, 'REJECTED'::character varying, 'PROCESSING'::character varying, 'SCHEDULED'::character varying]::text[])),
CONSTRAINT submission_types CHECK (submission_type::text = ANY (ARRAY['AUTO'::character varying, 'MANUAL'::character varying]::text[]))
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
CREATE TABLE swp_am_hcbe_pro.applications
(
id bigint NOT NULL DEFAULT nextval('swp_am_hcbe_pro.applications_id_seq'::regclass),
correlation_id character varying(64) COLLATE pg_catalog."default" NOT NULL,
incoming_timestamp timestamp without time zone NOT NULL,
source_input character varying(16) COLLATE pg_catalog."default" NOT NULL,
source_file_path character varying(255) COLLATE pg_catalog."default",
application_type character varying(127) COLLATE pg_catalog."default" NOT NULL,
loan_id bigint,
vin character varying(17) COLLATE pg_catalog."default",
cooperation_name character varying(255) COLLATE pg_catalog."default",
cooperation_id bigint,
submitter_name character varying(255) COLLATE pg_catalog."default",
submitter_id bigint,
dealer_name character varying(255) COLLATE pg_catalog."default",
dealer_id bigint,
dealer_ext_id character varying(25) COLLATE pg_catalog."default",
invoice_id character varying(25) COLLATE pg_catalog."default",
stock_id character varying(20) COLLATE pg_catalog."default",
payment_term character varying(20) COLLATE pg_catalog."default",
reg_document_id character varying(25) COLLATE pg_catalog."default",
invoice_amount numeric(20,4),
application_status character varying(64) COLLATE pg_catalog."default",
dealer_group_id bigint,
approver text COLLATE pg_catalog."default",
approve_timestamp timestamp without time zone,
payload text COLLATE pg_catalog."default" NOT NULL,
auto_resub_attempts integer NOT NULL DEFAULT 0,
row_number bigint,
email_sent boolean DEFAULT false,
modified_date timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP,
product_name text COLLATE pg_catalog."default",
priority smallint,
CONSTRAINT applications_pkey PRIMARY KEY (id),
CONSTRAINT chk_application_status CHECK (application_status::text = ANY (ARRAY['PROCESSED'::character varying, 'PROCESSING'::character varying, 'WAIT_NEXT_SUBMISSION'::character varying, 'WAIT_MANUAL_SUBMISSION'::character varying, 'WAIT_AUTOMATIC_SUBMISSION'::character varying, 'WAIT_IN_QUEUE'::character varying, 'SUBMISSION_NOT_FOUND'::character varying, 'WAIT_FOR_ASYNC_ACTIVATION'::character varying, 'WAIT_FOR_ASYNC_SHIPMENT'::character varying, 'WAIT_FOR_BOOKING_CONFIRMATION'::character varying, 'WAIT_FOR_ACTIVATION_CONFIRMATION'::character varying, 'REJECTED'::character varying, 'NOT_IN_QUEUE'::character varying, 'SCHEDULED'::character varying]::text[])),
CONSTRAINT chk_source CHECK (source_input::text = ANY (ARRAY['LM'::character varying, 'KOSYFA'::character varying, 'SWPII'::character varying, 'ADM'::character varying]::text[]))
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
CREATE TABLE swp_am_hcbe_pro.transaction_names
(
id bigint NOT NULL DEFAULT nextval('swp_am_hcbe_pro.transaction_names_id_seq'::regclass),
name character varying(32) COLLATE pg_catalog."default" NOT NULL,
sub_name character varying(32) COLLATE pg_catalog."default",
CONSTRAINT transaction_names_pkey PRIMARY KEY (id)
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
索引
CREATE INDEX submissions_app_id_asc_timestamp_desc_idx
ON swp_am_hcbe_pro.submissions USING btree
(application_id, "timestamp" DESC)
TABLESPACE pg_default;
CREATE INDEX submissions_app_id_timestamp_trans_name_id_idx
ON swp_am_hcbe_pro.submissions USING btree
(application_id, "timestamp", transaction_names_id)
TABLESPACE pg_default;
CREATE INDEX submissions_timestamp_asc_app_id_asc_idx
ON swp_am_hcbe_pro.submissions USING btree
("timestamp", application_id)
TABLESPACE pg_default;
CREATE INDEX application_correlation_id_idx
ON swp_am_hcbe_pro.applications USING btree
(correlation_id COLLATE pg_catalog."default")
TABLESPACE pg_default;
CREATE INDEX application_correlation_row_number_idx
ON swp_am_hcbe_pro.applications USING btree
(correlation_id COLLATE pg_catalog."default", row_number)
TABLESPACE pg_default;
CREATE INDEX applications_application_status_idx
ON swp_am_hcbe_pro.applications USING btree
(application_status COLLATE pg_catalog."default")
TABLESPACE pg_default;
CREATE INDEX applications_invoice_idx
ON swp_am_hcbe_pro.applications USING btree
(invoice_id COLLATE pg_catalog."default")
TABLESPACE pg_default;
CREATE INDEX applications_vin_idx
ON swp_am_hcbe_pro.applications USING btree
(vin COLLATE pg_catalog."default")
TABLESPACE pg_default;
我有以下看法
CREATE OR REPLACE VIEW swp_am_hcbe_pro.application_list_simple AS
WITH subm AS (
SELECT DISTINCT ON (s.application_id) s.application_id,
s."timestamp",
s.exit_code,
s.transaction_names_id
FROM swp_am_hcbe_pro.submissions s
ORDER BY s.application_id, s."timestamp" DESC
)
SELECT app.id,
app.correlation_id,
app.source_input,
app.source_file_path,
app.application_type,
app.loan_id,
app.vin,
app.cooperation_name,
app.cooperation_id,
app.submitter_name,
app.submitter_id,
app.dealer_id,
app.dealer_name,
app.dealer_ext_id,
app.invoice_id,
app.stock_id,
app.payment_term,
app.reg_document_id,
app.invoice_amount,
app.application_status,
app.incoming_timestamp,
app.dealer_group_id,
app.approver,
app.approve_timestamp,
subm.exit_code,
tn.name AS transaction_name,
tn.sub_name AS sub_transaction_name,
tn.id AS transaction_type_id,
subm."timestamp" AS last_submission_timestamp,
app.modified_date
FROM swp_am_hcbe_pro.applications app
LEFT JOIN subm ON app.id = subm.application_id
LEFT JOIN swp_am_hcbe_pro.transaction_names tn ON tn.id = subm.transaction_names_id;
如果我运行这个语句,经过的时间是:Execution time: 2481.333 ms
explain analyze
SELECT *, count(*) OVER () AS total FROM swp_am_hcbe_pro.application_list_simple
WHERE INCOMING_TIMESTAMP >= '2021-11-08' AND INCOMING_TIMESTAMP <= '2021-11-09'
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC LIMIT 100 OFFSET 0
;
我得到了以下内容
"Limit (cost=461799.85..461800.10 rows=100 width=490) (actual time=2473.878..2474.618 rows=100 loops=1)"
" -> Sort (cost=461799.85..461803.13 rows=1311 width=490) (actual time=2473.877..2474.612 rows=100 loops=1)"
" Sort Key: app.approve_timestamp DESC, app.incoming_timestamp DESC"
" Sort Method: top-N heapsort Memory: 112kB"
" -> WindowAgg (cost=458791.38..461749.74 rows=1311 width=490) (actual time=2471.792..2473.247 rows=1620 loops=1)"
" -> Hash Left Join (cost=458791.38..461720.25 rows=1311 width=482) (actual time=2456.132..2470.895 rows=1620 loops=1)"
" Hash Cond: (subm.transaction_names_id = tn.id)"
" CTE subm"
" -> Unique (cost=0.43..333656.64 rows=129297 width=31) (actual time=0.036..1846.992 rows=645062 loops=1)"
" -> Index Scan using submissions_app_id_asc_timestamp_desc_idx on submissions s (cost=0.43..329433.26 rows=1689349 width=31) (actual time=0.033..1621.049 rows=1699582 loops=1)"
" -> Hash Right Join (cost=125133.09..128058.44 rows=1311 width=459) (actual time=2456.083..2470.337 rows=1620 loops=1)"
" Hash Cond: (subm.application_id = app.id)"
" -> CTE Scan on subm (cost=0.00..2585.94 rows=129297 width=106) (actual time=0.038..2135.256 rows=645062 loops=1)"
" -> Hash (cost=125116.71..125116.71 rows=1311 width=361) (actual time=237.582..238.310 rows=1620 loops=1)"
" Buckets: 2048 Batches: 1 Memory Usage: 483kB"
" -> Gather (cost=1000.00..125116.71 rows=1311 width=361) (actual time=11.959..236.468 rows=1620 loops=1)"
" Workers Planned: 2"
" Workers Launched: 2"
" -> Parallel Seq Scan on applications app (cost=0.00..123985.61 rows=546 width=361) (actual time=2.880..97.484 rows=540 loops=3)"
" Filter: ((incoming_timestamp >= '2021-11-08 00:00:00'::timestamp without time zone) AND (incoming_timestamp <= '2021-11-09 00:00:00'::timestamp without time zone))"
" Rows Removed by Filter: 214530"
" -> Hash (cost=1.29..1.29 rows=29 width=31) (actual time=0.033..0.033 rows=29 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 10kB"
" -> Seq Scan on transaction_names tn (cost=0.00..1.29 rows=29 width=31) (actual time=0.011..0.015 rows=29 loops=1)"
"Planning time: 0.587 ms"
"Execution time: 2481.333 ms"
如果我运行这个只改变日期过滤器,需要Execution time: 365817.271 ms
explain analyze
SELECT *, count(*) OVER () AS total FROM swp_am_hcbe_pro.application_list_simple
WHERE INCOMING_TIMESTAMP >= '2021-11-09' AND INCOMING_TIMESTAMP <= '2021-11-10'
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC LIMIT 100 OFFSET 0
;
"Limit (cost=462844.68..462844.69 rows=1 width=490) (actual time=365809.554..365810.419 rows=100 loops=1)"
" -> Sort (cost=462844.68..462844.69 rows=1 width=490) (actual time=365809.553..365810.411 rows=100 loops=1)"
" Sort Key: app.approve_timestamp DESC, app.incoming_timestamp DESC"
" Sort Method: top-N heapsort Memory: 125kB"
" -> WindowAgg (cost=334656.77..462844.67 rows=1 width=490) (actual time=365806.595..365808.483 rows=2140 loops=1)"
" -> Nested Loop Left Join (cost=334656.77..462844.65 rows=1 width=482) (actual time=2094.856..365793.839 rows=2140 loops=1)"
" CTE subm"
" -> Unique (cost=0.43..333656.64 rows=129297 width=31) (actual time=0.036..1771.818 rows=645068 loops=1)"
" -> Index Scan using submissions_app_id_asc_timestamp_desc_idx on submissions s (cost=0.43..329433.26 rows=1689349 width=31) (actual time=0.034..1563.614 rows=1699595 loops=1)"
" -> Nested Loop Left Join (cost=1000.00..129187.86 rows=1 width=459) (actual time=2094.836..365762.361 rows=2140 loops=1)"
" Join Filter: (app.id = subm.application_id)"
" Rows Removed by Join Filter: 1380443382"
" -> Gather (cost=1000.00..124985.71 rows=1 width=361) (actual time=8.475..33.996 rows=2140 loops=1)"
" Workers Planned: 2"
" Workers Launched: 2"
" -> Parallel Seq Scan on applications app (cost=0.00..123985.61 rows=1 width=361) (actual time=1.809..103.597 rows=713 loops=3)"
" Filter: ((incoming_timestamp >= '2021-11-09 00:00:00'::timestamp without time zone) AND (incoming_timestamp <= '2021-11-10 00:00:00'::timestamp without time zone))"
" Rows Removed by Filter: 214359"
" -> CTE Scan on subm (cost=0.00..2585.94 rows=129297 width=106) (actual time=0.030..125.740 rows=645068 loops=2140)"
" -> Index Scan using transaction_names_pkey on transaction_names tn (cost=0.14..0.16 rows=1 width=31) (actual time=0.009..0.009 rows=1 loops=2140)"
" Index Cond: (id = subm.transaction_names_id)"
"Planning time: 0.414 ms"
"Execution time: 365817.271 ms"
我真的不明白为什么会这样。我还尝试 运行 查询过滤器获得多个日期(例如一周,一个月)并且所有这些都工作正常。
我清理受影响的表,即使没有那么多行。我还能检查什么?
如果您需要更多信息,请随时问我
更新
如果我将查询更改为此,在字符串上使用 to_timestamp
,那么它将起作用。但为什么它在所有其他情况下都有效,而在这个情况下却无效?为什么总是发生在当前日期?
explain analyze
SELECT * FROM swp_am_hcbe_pro.application_list_simple
WHERE INCOMING_TIMESTAMP >= to_timestamp('2021-11-09 00:00:00','YYYY-MM-DD HH24:MI:SS')
AND INCOMING_TIMESTAMP <= to_timestamp('2021-11-10 00:00:00','YYYY-MM-DD HH24:MI:SS')
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC LIMIT 100 OFFSET 0 ;
我得到以下
"Limit (cost=463151.72..463151.97 rows=100 width=481) (actual time=2743.036..2743.923 rows=100 loops=1)"
" -> Sort (cost=463151.72..463153.01 rows=517 width=481) (actual time=2743.035..2743.918 rows=100 loops=1)"
" Sort Key: app.approve_timestamp DESC, app.incoming_timestamp DESC"
" Sort Method: top-N heapsort Memory: 121kB"
" -> Hash Left Join (cost=460200.05..463126.79 rows=517 width=481) (actual time=2730.684..2741.744 rows=2382 loops=1)"
" Hash Cond: (subm.transaction_names_id = tn.id)"
" CTE subm"
" -> Unique (cost=0.43..333658.84 rows=129297 width=31) (actual time=0.020..1669.678 rows=645311 loops=1)"
" -> Index Scan using submissions_app_id_asc_timestamp_desc_idx on submissions s (cost=0.43..329435.46 rows=1689349 width=31) (actual time=0.019..1476.827 rows=1700028 loops=1)"
" -> Hash Right Join (cost=126539.56..129464.91 rows=517 width=458) (actual time=2730.642..2740.999 rows=2382 loops=1)"
" Hash Cond: (subm.application_id = app.id)"
" -> CTE Scan on subm (cost=0.00..2585.94 rows=129297 width=106) (actual time=0.023..1924.458 rows=645311 loops=1)"
" -> Hash (cost=126533.10..126533.10 rows=517 width=360) (actual time=736.655..737.534 rows=2382 loops=1)"
" Buckets: 4096 (originally 1024) Batches: 1 (originally 1) Memory Usage: 864kB"
" -> Gather (cost=1000.00..126533.10 rows=517 width=360) (actual time=18.882..734.265 rows=2382 loops=1)"
" Workers Planned: 2"
" Workers Launched: 2"
" -> Parallel Seq Scan on applications app (cost=0.00..125481.40 rows=215 width=360) (actual time=15.908..610.513 rows=794 loops=3)"
" Filter: ((incoming_timestamp >= to_timestamp('2021-11-09 00:00:00'::text, 'YYYY-MM-DD HH24:MI:SS'::text)) AND (incoming_timestamp <= to_timestamp('2021-11-10 00:00:00'::text, 'YYYY-MM-DD HH24:MI:SS'::text)))"
" Rows Removed by Filter: 214359"
" -> Hash (cost=1.29..1.29 rows=29 width=31) (actual time=0.026..0.026 rows=29 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 10kB"
" -> Seq Scan on transaction_names tn (cost=0.00..1.29 rows=29 width=31) (actual time=0.012..0.018 rows=29 loops=1)"
"Planning time: 0.370 ms"
"Execution time: 2751.279 ms"
那么,问题依旧
为什么这个查询需要 360 秒?
SELECT * FROM swp_am_hcbe_pro.application_list_simple
WHERE INCOMING_TIMESTAMP >= '2021-11-09' AND INCOMING_TIMESTAMP <= '2021-11-10'
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC LIMIT 100 OFFSET 0
;
但是这个需要3秒
SELECT * FROM swp_am_hcbe_pro.application_list_simple
WHERE INCOMING_TIMESTAMP >= to_timestamp('2021-11-09 00:00:00','YYYY-MM-DD HH24:MI:SS')
AND INCOMING_TIMESTAMP <= to_timestamp('2021-11-10 00:00:00','YYYY-MM-DD HH24:MI:SS')
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC LIMIT 100 OFFSET 0 ;
其他情况不管我用不用都有效to_timestamp
。请注意,我在上次更新中删除了 count(*) over()
以表明它不相关,所以问题仍然存在。
感谢您的支持
尝试使用 (TEMP) 视图而不是 CTE 来避免(非索引)CTE 扫描 [我还用 NOT EXISTS(...)
替换了 DISTINCT ON(...)
]:
CREATE OR REPLACE VIEW vsubm AS
SELECT -- DISTINCT ON (s.application_id)
s.application_id
, s.ztimestamp
, s.exit_code
, s.transaction_names_id
FROM submissions s
WHERE NOT EXISTS ( SELECT *
FROM submissions nx
WHERE nx.application_id = s.application_id
AND nx.ztimestamp > s.ztimestamp
)
-- ORDER BY s.application_id, s.ztimestamp DESC
;
CREATE OR REPLACE VIEW application_list_simple2 AS
SELECT app.id
, app.correlation_id
, app.source_input
, app.source_file_path
, app.application_type
, app.loan_id
, app.vin
, app.cooperation_name
, app.cooperation_id
, app.submitter_name
, app.submitter_id
, app.dealer_id
, app.dealer_name
, app.dealer_ext_id
, app.invoice_id
, app.stock_id
, app.payment_term
, app.reg_document_id
, app.invoice_amount
, app.application_status
, app.incoming_timestamp AS INCOMING_TIMESTAMP
, app.dealer_group_id
, app.approver
, app.approve_timestamp AS APPROVE_TIMESTAMP
, vsubm.exit_code
, tn.name AS transaction_name
, tn.sub_name AS sub_transaction_name
, tn.id AS transaction_type_id
, vsubm.ztimestamp AS last_submission_timestamp
, app.modified_date
FROM applications app
LEFT JOIN vsubm ON app.id = vsubm.application_id
LEFT JOIN transaction_names tn ON tn.id = vsubm.transaction_names_id
;
-- EXPLAIN
-- explain analyze
SELECT *
-- , count(*) OVER () AS total
FROM application_list_simple2
WHERE INCOMING_TIMESTAMP >= '2021-11-08' AND INCOMING_TIMESTAMP < '2021-11-09'
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC
-- LIMIT 100 OFFSET 0
WRT 观察到的行为:
- 在时间 的边缘选择日期范围可能会导致生成不同的计划。不同于中间的一个时间跨度
- 可能今天的记录统计还不完整(统计收集器可能落后了)
- 糟糕的计划(大量散列连接和序列扫描)可能是由于缺少统计信息、缺少索引或
random_page_cost
设置为高造成的。
- table 的行大小相当大。也许需要一些标准化,特别是对于
applications
table.
- 混合时间戳 with/without 时区可能会造成一些混乱。 [一般建议:始终使用时间戳 和 时区]
我不是 Postgres 方面的专家,但我正在尝试理解这种奇怪的行为,也许你们中的一些人可以给我一些见解。
涉及到的表和索引就这些
表
CREATE TABLE swp_am_hcbe_pro.submissions
(
id bigint NOT NULL DEFAULT nextval('swp_am_hcbe_pro.submissions_id_seq'::regclass),
application_id bigint NOT NULL,
transaction_names_id bigint NOT NULL,
"timestamp" timestamp without time zone NOT NULL,
submission_status character varying(32) COLLATE pg_catalog."default" NOT NULL,
submission_type character varying(16) COLLATE pg_catalog."default" NOT NULL,
exit_code character varying(32) COLLATE pg_catalog."default",
ignore_partner_status boolean NOT NULL DEFAULT false,
ignore_sell_partner_status boolean NOT NULL DEFAULT false,
ignore_exclusion_rules boolean NOT NULL DEFAULT false,
dpa_iban character varying(32) COLLATE pg_catalog."default",
dpa_bic character varying(32) COLLATE pg_catalog."default",
dpa_id bigint,
dpa_blz bigint,
dda_iban character varying(32) COLLATE pg_catalog."default",
dda_bic character varying(32) COLLATE pg_catalog."default",
dda_id bigint,
dda_blz bigint,
dda_sepa_mandate_ref character varying(128) COLLATE pg_catalog."default",
use_different_sepa_mandate character varying(34) COLLATE pg_catalog."default",
use_manual_limit_extension boolean NOT NULL DEFAULT false,
use_automatic_limit_extension boolean NOT NULL DEFAULT false,
json_payload text COLLATE pg_catalog."default" NOT NULL,
final_timestamp timestamp without time zone,
CONSTRAINT submissions_pkey PRIMARY KEY (id),
CONSTRAINT submission_app_id FOREIGN KEY (application_id)
REFERENCES swp_am_hcbe_pro.applications (id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE CASCADE,
CONSTRAINT submission_transaction_names_id FOREIGN KEY (transaction_names_id)
REFERENCES swp_am_hcbe_pro.transaction_names (id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE NO ACTION,
CONSTRAINT chk_submission_status CHECK (submission_status::text = ANY (ARRAY['ERROR'::character varying, 'DENIED'::character varying, 'PROCESSED'::character varying, 'REJECTED'::character varying, 'PROCESSING'::character varying, 'SCHEDULED'::character varying]::text[])),
CONSTRAINT submission_types CHECK (submission_type::text = ANY (ARRAY['AUTO'::character varying, 'MANUAL'::character varying]::text[]))
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
CREATE TABLE swp_am_hcbe_pro.applications
(
id bigint NOT NULL DEFAULT nextval('swp_am_hcbe_pro.applications_id_seq'::regclass),
correlation_id character varying(64) COLLATE pg_catalog."default" NOT NULL,
incoming_timestamp timestamp without time zone NOT NULL,
source_input character varying(16) COLLATE pg_catalog."default" NOT NULL,
source_file_path character varying(255) COLLATE pg_catalog."default",
application_type character varying(127) COLLATE pg_catalog."default" NOT NULL,
loan_id bigint,
vin character varying(17) COLLATE pg_catalog."default",
cooperation_name character varying(255) COLLATE pg_catalog."default",
cooperation_id bigint,
submitter_name character varying(255) COLLATE pg_catalog."default",
submitter_id bigint,
dealer_name character varying(255) COLLATE pg_catalog."default",
dealer_id bigint,
dealer_ext_id character varying(25) COLLATE pg_catalog."default",
invoice_id character varying(25) COLLATE pg_catalog."default",
stock_id character varying(20) COLLATE pg_catalog."default",
payment_term character varying(20) COLLATE pg_catalog."default",
reg_document_id character varying(25) COLLATE pg_catalog."default",
invoice_amount numeric(20,4),
application_status character varying(64) COLLATE pg_catalog."default",
dealer_group_id bigint,
approver text COLLATE pg_catalog."default",
approve_timestamp timestamp without time zone,
payload text COLLATE pg_catalog."default" NOT NULL,
auto_resub_attempts integer NOT NULL DEFAULT 0,
row_number bigint,
email_sent boolean DEFAULT false,
modified_date timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP,
product_name text COLLATE pg_catalog."default",
priority smallint,
CONSTRAINT applications_pkey PRIMARY KEY (id),
CONSTRAINT chk_application_status CHECK (application_status::text = ANY (ARRAY['PROCESSED'::character varying, 'PROCESSING'::character varying, 'WAIT_NEXT_SUBMISSION'::character varying, 'WAIT_MANUAL_SUBMISSION'::character varying, 'WAIT_AUTOMATIC_SUBMISSION'::character varying, 'WAIT_IN_QUEUE'::character varying, 'SUBMISSION_NOT_FOUND'::character varying, 'WAIT_FOR_ASYNC_ACTIVATION'::character varying, 'WAIT_FOR_ASYNC_SHIPMENT'::character varying, 'WAIT_FOR_BOOKING_CONFIRMATION'::character varying, 'WAIT_FOR_ACTIVATION_CONFIRMATION'::character varying, 'REJECTED'::character varying, 'NOT_IN_QUEUE'::character varying, 'SCHEDULED'::character varying]::text[])),
CONSTRAINT chk_source CHECK (source_input::text = ANY (ARRAY['LM'::character varying, 'KOSYFA'::character varying, 'SWPII'::character varying, 'ADM'::character varying]::text[]))
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
CREATE TABLE swp_am_hcbe_pro.transaction_names
(
id bigint NOT NULL DEFAULT nextval('swp_am_hcbe_pro.transaction_names_id_seq'::regclass),
name character varying(32) COLLATE pg_catalog."default" NOT NULL,
sub_name character varying(32) COLLATE pg_catalog."default",
CONSTRAINT transaction_names_pkey PRIMARY KEY (id)
)
WITH (
OIDS = FALSE
)
TABLESPACE pg_default;
索引
CREATE INDEX submissions_app_id_asc_timestamp_desc_idx
ON swp_am_hcbe_pro.submissions USING btree
(application_id, "timestamp" DESC)
TABLESPACE pg_default;
CREATE INDEX submissions_app_id_timestamp_trans_name_id_idx
ON swp_am_hcbe_pro.submissions USING btree
(application_id, "timestamp", transaction_names_id)
TABLESPACE pg_default;
CREATE INDEX submissions_timestamp_asc_app_id_asc_idx
ON swp_am_hcbe_pro.submissions USING btree
("timestamp", application_id)
TABLESPACE pg_default;
CREATE INDEX application_correlation_id_idx
ON swp_am_hcbe_pro.applications USING btree
(correlation_id COLLATE pg_catalog."default")
TABLESPACE pg_default;
CREATE INDEX application_correlation_row_number_idx
ON swp_am_hcbe_pro.applications USING btree
(correlation_id COLLATE pg_catalog."default", row_number)
TABLESPACE pg_default;
CREATE INDEX applications_application_status_idx
ON swp_am_hcbe_pro.applications USING btree
(application_status COLLATE pg_catalog."default")
TABLESPACE pg_default;
CREATE INDEX applications_invoice_idx
ON swp_am_hcbe_pro.applications USING btree
(invoice_id COLLATE pg_catalog."default")
TABLESPACE pg_default;
CREATE INDEX applications_vin_idx
ON swp_am_hcbe_pro.applications USING btree
(vin COLLATE pg_catalog."default")
TABLESPACE pg_default;
我有以下看法
CREATE OR REPLACE VIEW swp_am_hcbe_pro.application_list_simple AS
WITH subm AS (
SELECT DISTINCT ON (s.application_id) s.application_id,
s."timestamp",
s.exit_code,
s.transaction_names_id
FROM swp_am_hcbe_pro.submissions s
ORDER BY s.application_id, s."timestamp" DESC
)
SELECT app.id,
app.correlation_id,
app.source_input,
app.source_file_path,
app.application_type,
app.loan_id,
app.vin,
app.cooperation_name,
app.cooperation_id,
app.submitter_name,
app.submitter_id,
app.dealer_id,
app.dealer_name,
app.dealer_ext_id,
app.invoice_id,
app.stock_id,
app.payment_term,
app.reg_document_id,
app.invoice_amount,
app.application_status,
app.incoming_timestamp,
app.dealer_group_id,
app.approver,
app.approve_timestamp,
subm.exit_code,
tn.name AS transaction_name,
tn.sub_name AS sub_transaction_name,
tn.id AS transaction_type_id,
subm."timestamp" AS last_submission_timestamp,
app.modified_date
FROM swp_am_hcbe_pro.applications app
LEFT JOIN subm ON app.id = subm.application_id
LEFT JOIN swp_am_hcbe_pro.transaction_names tn ON tn.id = subm.transaction_names_id;
如果我运行这个语句,经过的时间是:Execution time: 2481.333 ms
explain analyze
SELECT *, count(*) OVER () AS total FROM swp_am_hcbe_pro.application_list_simple
WHERE INCOMING_TIMESTAMP >= '2021-11-08' AND INCOMING_TIMESTAMP <= '2021-11-09'
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC LIMIT 100 OFFSET 0
;
我得到了以下内容
"Limit (cost=461799.85..461800.10 rows=100 width=490) (actual time=2473.878..2474.618 rows=100 loops=1)"
" -> Sort (cost=461799.85..461803.13 rows=1311 width=490) (actual time=2473.877..2474.612 rows=100 loops=1)"
" Sort Key: app.approve_timestamp DESC, app.incoming_timestamp DESC"
" Sort Method: top-N heapsort Memory: 112kB"
" -> WindowAgg (cost=458791.38..461749.74 rows=1311 width=490) (actual time=2471.792..2473.247 rows=1620 loops=1)"
" -> Hash Left Join (cost=458791.38..461720.25 rows=1311 width=482) (actual time=2456.132..2470.895 rows=1620 loops=1)"
" Hash Cond: (subm.transaction_names_id = tn.id)"
" CTE subm"
" -> Unique (cost=0.43..333656.64 rows=129297 width=31) (actual time=0.036..1846.992 rows=645062 loops=1)"
" -> Index Scan using submissions_app_id_asc_timestamp_desc_idx on submissions s (cost=0.43..329433.26 rows=1689349 width=31) (actual time=0.033..1621.049 rows=1699582 loops=1)"
" -> Hash Right Join (cost=125133.09..128058.44 rows=1311 width=459) (actual time=2456.083..2470.337 rows=1620 loops=1)"
" Hash Cond: (subm.application_id = app.id)"
" -> CTE Scan on subm (cost=0.00..2585.94 rows=129297 width=106) (actual time=0.038..2135.256 rows=645062 loops=1)"
" -> Hash (cost=125116.71..125116.71 rows=1311 width=361) (actual time=237.582..238.310 rows=1620 loops=1)"
" Buckets: 2048 Batches: 1 Memory Usage: 483kB"
" -> Gather (cost=1000.00..125116.71 rows=1311 width=361) (actual time=11.959..236.468 rows=1620 loops=1)"
" Workers Planned: 2"
" Workers Launched: 2"
" -> Parallel Seq Scan on applications app (cost=0.00..123985.61 rows=546 width=361) (actual time=2.880..97.484 rows=540 loops=3)"
" Filter: ((incoming_timestamp >= '2021-11-08 00:00:00'::timestamp without time zone) AND (incoming_timestamp <= '2021-11-09 00:00:00'::timestamp without time zone))"
" Rows Removed by Filter: 214530"
" -> Hash (cost=1.29..1.29 rows=29 width=31) (actual time=0.033..0.033 rows=29 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 10kB"
" -> Seq Scan on transaction_names tn (cost=0.00..1.29 rows=29 width=31) (actual time=0.011..0.015 rows=29 loops=1)"
"Planning time: 0.587 ms"
"Execution time: 2481.333 ms"
如果我运行这个只改变日期过滤器,需要Execution time: 365817.271 ms
explain analyze
SELECT *, count(*) OVER () AS total FROM swp_am_hcbe_pro.application_list_simple
WHERE INCOMING_TIMESTAMP >= '2021-11-09' AND INCOMING_TIMESTAMP <= '2021-11-10'
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC LIMIT 100 OFFSET 0
;
"Limit (cost=462844.68..462844.69 rows=1 width=490) (actual time=365809.554..365810.419 rows=100 loops=1)"
" -> Sort (cost=462844.68..462844.69 rows=1 width=490) (actual time=365809.553..365810.411 rows=100 loops=1)"
" Sort Key: app.approve_timestamp DESC, app.incoming_timestamp DESC"
" Sort Method: top-N heapsort Memory: 125kB"
" -> WindowAgg (cost=334656.77..462844.67 rows=1 width=490) (actual time=365806.595..365808.483 rows=2140 loops=1)"
" -> Nested Loop Left Join (cost=334656.77..462844.65 rows=1 width=482) (actual time=2094.856..365793.839 rows=2140 loops=1)"
" CTE subm"
" -> Unique (cost=0.43..333656.64 rows=129297 width=31) (actual time=0.036..1771.818 rows=645068 loops=1)"
" -> Index Scan using submissions_app_id_asc_timestamp_desc_idx on submissions s (cost=0.43..329433.26 rows=1689349 width=31) (actual time=0.034..1563.614 rows=1699595 loops=1)"
" -> Nested Loop Left Join (cost=1000.00..129187.86 rows=1 width=459) (actual time=2094.836..365762.361 rows=2140 loops=1)"
" Join Filter: (app.id = subm.application_id)"
" Rows Removed by Join Filter: 1380443382"
" -> Gather (cost=1000.00..124985.71 rows=1 width=361) (actual time=8.475..33.996 rows=2140 loops=1)"
" Workers Planned: 2"
" Workers Launched: 2"
" -> Parallel Seq Scan on applications app (cost=0.00..123985.61 rows=1 width=361) (actual time=1.809..103.597 rows=713 loops=3)"
" Filter: ((incoming_timestamp >= '2021-11-09 00:00:00'::timestamp without time zone) AND (incoming_timestamp <= '2021-11-10 00:00:00'::timestamp without time zone))"
" Rows Removed by Filter: 214359"
" -> CTE Scan on subm (cost=0.00..2585.94 rows=129297 width=106) (actual time=0.030..125.740 rows=645068 loops=2140)"
" -> Index Scan using transaction_names_pkey on transaction_names tn (cost=0.14..0.16 rows=1 width=31) (actual time=0.009..0.009 rows=1 loops=2140)"
" Index Cond: (id = subm.transaction_names_id)"
"Planning time: 0.414 ms"
"Execution time: 365817.271 ms"
我真的不明白为什么会这样。我还尝试 运行 查询过滤器获得多个日期(例如一周,一个月)并且所有这些都工作正常。
我清理受影响的表,即使没有那么多行。我还能检查什么?
如果您需要更多信息,请随时问我
更新
如果我将查询更改为此,在字符串上使用 to_timestamp
,那么它将起作用。但为什么它在所有其他情况下都有效,而在这个情况下却无效?为什么总是发生在当前日期?
explain analyze
SELECT * FROM swp_am_hcbe_pro.application_list_simple
WHERE INCOMING_TIMESTAMP >= to_timestamp('2021-11-09 00:00:00','YYYY-MM-DD HH24:MI:SS')
AND INCOMING_TIMESTAMP <= to_timestamp('2021-11-10 00:00:00','YYYY-MM-DD HH24:MI:SS')
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC LIMIT 100 OFFSET 0 ;
我得到以下
"Limit (cost=463151.72..463151.97 rows=100 width=481) (actual time=2743.036..2743.923 rows=100 loops=1)"
" -> Sort (cost=463151.72..463153.01 rows=517 width=481) (actual time=2743.035..2743.918 rows=100 loops=1)"
" Sort Key: app.approve_timestamp DESC, app.incoming_timestamp DESC"
" Sort Method: top-N heapsort Memory: 121kB"
" -> Hash Left Join (cost=460200.05..463126.79 rows=517 width=481) (actual time=2730.684..2741.744 rows=2382 loops=1)"
" Hash Cond: (subm.transaction_names_id = tn.id)"
" CTE subm"
" -> Unique (cost=0.43..333658.84 rows=129297 width=31) (actual time=0.020..1669.678 rows=645311 loops=1)"
" -> Index Scan using submissions_app_id_asc_timestamp_desc_idx on submissions s (cost=0.43..329435.46 rows=1689349 width=31) (actual time=0.019..1476.827 rows=1700028 loops=1)"
" -> Hash Right Join (cost=126539.56..129464.91 rows=517 width=458) (actual time=2730.642..2740.999 rows=2382 loops=1)"
" Hash Cond: (subm.application_id = app.id)"
" -> CTE Scan on subm (cost=0.00..2585.94 rows=129297 width=106) (actual time=0.023..1924.458 rows=645311 loops=1)"
" -> Hash (cost=126533.10..126533.10 rows=517 width=360) (actual time=736.655..737.534 rows=2382 loops=1)"
" Buckets: 4096 (originally 1024) Batches: 1 (originally 1) Memory Usage: 864kB"
" -> Gather (cost=1000.00..126533.10 rows=517 width=360) (actual time=18.882..734.265 rows=2382 loops=1)"
" Workers Planned: 2"
" Workers Launched: 2"
" -> Parallel Seq Scan on applications app (cost=0.00..125481.40 rows=215 width=360) (actual time=15.908..610.513 rows=794 loops=3)"
" Filter: ((incoming_timestamp >= to_timestamp('2021-11-09 00:00:00'::text, 'YYYY-MM-DD HH24:MI:SS'::text)) AND (incoming_timestamp <= to_timestamp('2021-11-10 00:00:00'::text, 'YYYY-MM-DD HH24:MI:SS'::text)))"
" Rows Removed by Filter: 214359"
" -> Hash (cost=1.29..1.29 rows=29 width=31) (actual time=0.026..0.026 rows=29 loops=1)"
" Buckets: 1024 Batches: 1 Memory Usage: 10kB"
" -> Seq Scan on transaction_names tn (cost=0.00..1.29 rows=29 width=31) (actual time=0.012..0.018 rows=29 loops=1)"
"Planning time: 0.370 ms"
"Execution time: 2751.279 ms"
那么,问题依旧
为什么这个查询需要 360 秒?
SELECT * FROM swp_am_hcbe_pro.application_list_simple
WHERE INCOMING_TIMESTAMP >= '2021-11-09' AND INCOMING_TIMESTAMP <= '2021-11-10'
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC LIMIT 100 OFFSET 0
;
但是这个需要3秒
SELECT * FROM swp_am_hcbe_pro.application_list_simple
WHERE INCOMING_TIMESTAMP >= to_timestamp('2021-11-09 00:00:00','YYYY-MM-DD HH24:MI:SS')
AND INCOMING_TIMESTAMP <= to_timestamp('2021-11-10 00:00:00','YYYY-MM-DD HH24:MI:SS')
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC LIMIT 100 OFFSET 0 ;
其他情况不管我用不用都有效to_timestamp
。请注意,我在上次更新中删除了 count(*) over()
以表明它不相关,所以问题仍然存在。
感谢您的支持
尝试使用 (TEMP) 视图而不是 CTE 来避免(非索引)CTE 扫描 [我还用 NOT EXISTS(...)
替换了 DISTINCT ON(...)
]:
CREATE OR REPLACE VIEW vsubm AS
SELECT -- DISTINCT ON (s.application_id)
s.application_id
, s.ztimestamp
, s.exit_code
, s.transaction_names_id
FROM submissions s
WHERE NOT EXISTS ( SELECT *
FROM submissions nx
WHERE nx.application_id = s.application_id
AND nx.ztimestamp > s.ztimestamp
)
-- ORDER BY s.application_id, s.ztimestamp DESC
;
CREATE OR REPLACE VIEW application_list_simple2 AS
SELECT app.id
, app.correlation_id
, app.source_input
, app.source_file_path
, app.application_type
, app.loan_id
, app.vin
, app.cooperation_name
, app.cooperation_id
, app.submitter_name
, app.submitter_id
, app.dealer_id
, app.dealer_name
, app.dealer_ext_id
, app.invoice_id
, app.stock_id
, app.payment_term
, app.reg_document_id
, app.invoice_amount
, app.application_status
, app.incoming_timestamp AS INCOMING_TIMESTAMP
, app.dealer_group_id
, app.approver
, app.approve_timestamp AS APPROVE_TIMESTAMP
, vsubm.exit_code
, tn.name AS transaction_name
, tn.sub_name AS sub_transaction_name
, tn.id AS transaction_type_id
, vsubm.ztimestamp AS last_submission_timestamp
, app.modified_date
FROM applications app
LEFT JOIN vsubm ON app.id = vsubm.application_id
LEFT JOIN transaction_names tn ON tn.id = vsubm.transaction_names_id
;
-- EXPLAIN
-- explain analyze
SELECT *
-- , count(*) OVER () AS total
FROM application_list_simple2
WHERE INCOMING_TIMESTAMP >= '2021-11-08' AND INCOMING_TIMESTAMP < '2021-11-09'
ORDER BY APPROVE_TIMESTAMP DESC, INCOMING_TIMESTAMP DESC
-- LIMIT 100 OFFSET 0
WRT 观察到的行为:
- 在时间 的边缘选择日期范围可能会导致生成不同的计划。不同于中间的一个时间跨度
- 可能今天的记录统计还不完整(统计收集器可能落后了)
- 糟糕的计划(大量散列连接和序列扫描)可能是由于缺少统计信息、缺少索引或
random_page_cost
设置为高造成的。 - table 的行大小相当大。也许需要一些标准化,特别是对于
applications
table. - 混合时间戳 with/without 时区可能会造成一些混乱。 [一般建议:始终使用时间戳 和 时区]