优化两个数百万行表之间的内部连接
optimize an inner join between two multi-million row tables
我是 Postgres 的新手,对解释的工作原理还比较陌生。我在下面有一个典型的查询,我只是替换了日期:
explain
select account_id,
security_id,
market_value_date,
sum(market_value) market_value
from market_value_history mvh
inner join holding_cust hc on hc.id = mvh.owning_object_id
where
hc.account_id = 24766
and market_value_date = '2015-07-02'
and mvh.created_by = 'HoldingLoad'
group by account_id, security_id, market_value_date
order by security_id, market_value_date;
附上explain截图
holding_cust table 的计数是 200 万行,market_value_history table 的计数是 1.63 亿行
下面是 table 定义和索引 market_value_history 和 holding_cust:
如果您能给我任何关于调整此查询的建议,我将不胜感激。
CREATE TABLE public.market_value_history
(
id integer NOT NULL DEFAULT nextval('market_value_id_seq'::regclass),
market_value numeric(18,6) NOT NULL,
market_value_date date,
holding_type character varying(25) NOT NULL,
owning_object_type character varying(25) NOT NULL,
owning_object_id integer NOT NULL,
created_by character varying(50) NOT NULL,
created_dt timestamp without time zone NOT NULL,
last_modified_dt timestamp without time zone NOT NULL,
CONSTRAINT market_value_history_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
ALTER TABLE public.market_value_history
OWNER TO postgres;
-- Index: public.ix_market_value_history_id
-- DROP INDEX public.ix_market_value_history_id;
CREATE INDEX ix_market_value_history_id
ON public.market_value_history
USING btree
(owning_object_type COLLATE pg_catalog."default", owning_object_id);
-- Index: public.ix_market_value_history_object_type_date
-- DROP INDEX public.ix_market_value_history_object_type_date;
CREATE UNIQUE INDEX ix_market_value_history_object_type_date
ON public.market_value_history
USING btree
(owning_object_type COLLATE pg_catalog."default", owning_object_id, holding_type COLLATE pg_catalog."default", market_value_date);
CREATE TABLE public.holding_cust
(
id integer NOT NULL DEFAULT nextval('holding_cust_id_seq'::regclass),
account_id integer NOT NULL,
security_id integer NOT NULL,
subaccount_type integer,
trade_date date,
purchase_date date,
quantity numeric(18,6),
net_cost numeric(18,2),
adjusted_net_cost numeric(18,2),
open_date date,
close_date date,
created_by character varying(50) NOT NULL,
created_dt timestamp without time zone NOT NULL,
last_modified_dt timestamp without time zone NOT NULL,
CONSTRAINT holding_cust_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
ALTER TABLE public.holding_cust
OWNER TO postgres;
-- Index: public.ix_holding_cust_account_id
-- DROP INDEX public.ix_holding_cust_account_id;
CREATE INDEX ix_holding_cust_account_id
ON public.holding_cust
USING btree
(account_id);
-- Index: public.ix_holding_cust_acctid_secid_asofdt
-- DROP INDEX public.ix_holding_cust_acctid_secid_asofdt;
CREATE INDEX ix_holding_cust_acctid_secid_asofdt
ON public.holding_cust
USING btree
(account_id, security_id, trade_date DESC);
-- Index: public.ix_holding_cust_security_id
-- DROP INDEX public.ix_holding_cust_security_id;
CREATE INDEX ix_holding_cust_security_id
ON public.holding_cust
USING btree
(security_id);
-- Index: public.ix_holding_cust_trade_date
-- DROP INDEX public.ix_holding_cust_trade_date;
CREATE INDEX ix_holding_cust_trade_date
ON public.holding_cust
USING btree
(trade_date);
两件事:
- 正如 Dmitry 指出的那样,您应该考虑在
market_value_date
字段上创建索引。有可能 post 您有一个完全不同的查询计划,这可能会或可能不会带来其他瓶颈,但它肯定会消除这个 seq-Scan
.
- 次要(因为我怀疑它是否会影响性能),但其次,如果您没有通过设计强制执行字段长度,您可能希望将 createdby 字段更改为
TEXT
。从查询中可以看出,它试图将所有 createdby 字段转换为此查询的 TEXT
。
我是 Postgres 的新手,对解释的工作原理还比较陌生。我在下面有一个典型的查询,我只是替换了日期:
explain
select account_id,
security_id,
market_value_date,
sum(market_value) market_value
from market_value_history mvh
inner join holding_cust hc on hc.id = mvh.owning_object_id
where
hc.account_id = 24766
and market_value_date = '2015-07-02'
and mvh.created_by = 'HoldingLoad'
group by account_id, security_id, market_value_date
order by security_id, market_value_date;
附上explain截图
下面是 table 定义和索引 market_value_history 和 holding_cust:
如果您能给我任何关于调整此查询的建议,我将不胜感激。
CREATE TABLE public.market_value_history
(
id integer NOT NULL DEFAULT nextval('market_value_id_seq'::regclass),
market_value numeric(18,6) NOT NULL,
market_value_date date,
holding_type character varying(25) NOT NULL,
owning_object_type character varying(25) NOT NULL,
owning_object_id integer NOT NULL,
created_by character varying(50) NOT NULL,
created_dt timestamp without time zone NOT NULL,
last_modified_dt timestamp without time zone NOT NULL,
CONSTRAINT market_value_history_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
ALTER TABLE public.market_value_history
OWNER TO postgres;
-- Index: public.ix_market_value_history_id
-- DROP INDEX public.ix_market_value_history_id;
CREATE INDEX ix_market_value_history_id
ON public.market_value_history
USING btree
(owning_object_type COLLATE pg_catalog."default", owning_object_id);
-- Index: public.ix_market_value_history_object_type_date
-- DROP INDEX public.ix_market_value_history_object_type_date;
CREATE UNIQUE INDEX ix_market_value_history_object_type_date
ON public.market_value_history
USING btree
(owning_object_type COLLATE pg_catalog."default", owning_object_id, holding_type COLLATE pg_catalog."default", market_value_date);
CREATE TABLE public.holding_cust
(
id integer NOT NULL DEFAULT nextval('holding_cust_id_seq'::regclass),
account_id integer NOT NULL,
security_id integer NOT NULL,
subaccount_type integer,
trade_date date,
purchase_date date,
quantity numeric(18,6),
net_cost numeric(18,2),
adjusted_net_cost numeric(18,2),
open_date date,
close_date date,
created_by character varying(50) NOT NULL,
created_dt timestamp without time zone NOT NULL,
last_modified_dt timestamp without time zone NOT NULL,
CONSTRAINT holding_cust_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
ALTER TABLE public.holding_cust
OWNER TO postgres;
-- Index: public.ix_holding_cust_account_id
-- DROP INDEX public.ix_holding_cust_account_id;
CREATE INDEX ix_holding_cust_account_id
ON public.holding_cust
USING btree
(account_id);
-- Index: public.ix_holding_cust_acctid_secid_asofdt
-- DROP INDEX public.ix_holding_cust_acctid_secid_asofdt;
CREATE INDEX ix_holding_cust_acctid_secid_asofdt
ON public.holding_cust
USING btree
(account_id, security_id, trade_date DESC);
-- Index: public.ix_holding_cust_security_id
-- DROP INDEX public.ix_holding_cust_security_id;
CREATE INDEX ix_holding_cust_security_id
ON public.holding_cust
USING btree
(security_id);
-- Index: public.ix_holding_cust_trade_date
-- DROP INDEX public.ix_holding_cust_trade_date;
CREATE INDEX ix_holding_cust_trade_date
ON public.holding_cust
USING btree
(trade_date);
两件事:
- 正如 Dmitry 指出的那样,您应该考虑在
market_value_date
字段上创建索引。有可能 post 您有一个完全不同的查询计划,这可能会或可能不会带来其他瓶颈,但它肯定会消除这个seq-Scan
. - 次要(因为我怀疑它是否会影响性能),但其次,如果您没有通过设计强制执行字段长度,您可能希望将 createdby 字段更改为
TEXT
。从查询中可以看出,它试图将所有 createdby 字段转换为此查询的TEXT
。