为什么在greenplum中,partitioned table使用nestedloop join,而non-partitioned table使用hash join
Why in greenplum, partitioned table uses nestedloop join, while non-partitioned table uses hash join
我创建了两个具有 100 列的 tables(A, B),相同的 DDL,只是 B 被分区了
CREATE TABLE A (
id integer, ......, col integer,
CONSTRAINT A_pkey PRIMARY KEY (id))
WITH (OIDS = FALSE)
TABLESPACE pg_default
DISTRIBUTED BY (id);
CREATE TABLE B (
id integer, ......, col integer,
CONSTRAINT B_pkey PRIMARY KEY (id))
WITH (OIDS = FALSE)
TABLESPACE pg_default
DISTRIBUTED BY (id)
PARTITION BY RANGE(id)
(START (1) END (2100000) EVERY (500000),
DEFAULT PARTITION extra
);
并将相同的数据(2000000 行)导入 A 和 B。然后我分别对 A 和 B 执行 sql:
UPDATE A a SET a.col = c.col from C c where c.id = a.id
UPDATE B b SET b.col = c.col from C c where c.id = b.id
结果A过了一分钟就成功了,B却花了很长时间,最后出现了内存错误:
ERROR: Canceling query because of high VMEM usage.
所以我检查了两个 sql 的 EXPLAIN,我发现 A 使用了 Hash Join 而 B 使用了 Nested-Loop Join.
分区 table 使用嵌套循环连接有什么原因吗? greenplum在存储百万级数据时是否不需要使用table分区?
您正在做一些不推荐的事情,这可能解释了为什么您会看到嵌套循环。
- 通常避免 UPDATE 语句。该行的旧版本加上该行的新版本保留在磁盘上。因此,如果您更新整个 table,您实际上是在将其使用的磁盘物理大小增加一倍。
- 我从未见过用于分区 table 的堆 table。您应该在 Greenplum 中主要使用 Append Only tables,尤其是在较大的 tables 上,例如分区 table.
- 您正在按分发键进行分区。这是不推荐的,根本没有好处。您打算按一系列 ID 进行过滤吗?这很不寻常。如果是这样,请将分发密钥更改为其他内容。
- 我认为 Pivotal 禁用了在分区 table 上创建主键的功能。曾几何时,这是不允许的。我不鼓励您创建任何主键,因为它只占用 space 而优化器通常不使用它。
修复这些项目后,我无法重现您的嵌套循环问题。我也在用5.0.0版本。
drop table if exists a;
drop table if exists b;
drop table if exists c;
CREATE TABLE A
(id integer, col integer, mydate timestamp)
WITH (appendonly=true)
DISTRIBUTED BY (id);
CREATE TABLE B
(id integer, col integer, mydate timestamp)
WITH (appendonly=true)
DISTRIBUTED BY (id)
PARTITION BY RANGE(mydate)
(START ('2015-01-01'::timestamp) END ('2018-12-31'::timestamp) EVERY ('1 month'::interval),
DEFAULT PARTITION extra
);
create table c
(id integer, col integer, mydate timestamp)
distributed by (id);
insert into a
select i, i+10, '2015-01-01'::timestamp + '1 day'::interval*i
from generate_series(0, 2000) as i
where '2015-01-01'::timestamp + '1 day'::interval*i < '2019-01-01'::timestamp;
insert into b
select i, i+10, '2015-01-01'::timestamp + '1 day'::interval*i
from generate_series(0, 2000) as i
where '2015-01-01'::timestamp + '1 day'::interval*i < '2019-01-01'::timestamp;
insert into c
select i, i+10, '2015-01-01'::timestamp + '1 day'::interval*i
from generate_series(0, 2000) as i
where '2015-01-01'::timestamp + '1 day'::interval*i < '2019-01-01'::timestamp;
explain UPDATE A a SET col = c.col from C c where c.id = a.id;
/*
"Update (cost=0.00..862.13 rows=1 width=1)"
" -> Result (cost=0.00..862.00 rows=1 width=34)"
" -> Split (cost=0.00..862.00 rows=1 width=30)"
" -> Hash Join (cost=0.00..862.00 rows=1 width=30)"
" Hash Cond: public.a.id = c.id"
" -> Table Scan on a (cost=0.00..431.00 rows=1 width=26)"
" -> Hash (cost=431.00..431.00 rows=1 width=8)"
" -> Table Scan on c (cost=0.00..431.00 rows=1 width=8)"
"Settings: optimizer_join_arity_for_associativity_commutativity=18"
"Optimizer status: PQO version 2.42.0"
*/
explain UPDATE B b SET col = c.col from C c where c.id = b.id;
/*
"Update (cost=0.00..862.13 rows=1 width=1)"
" -> Result (cost=0.00..862.00 rows=1 width=34)"
" -> Split (cost=0.00..862.00 rows=1 width=30)"
" -> Hash Join (cost=0.00..862.00 rows=1 width=30)"
" Hash Cond: public.a.id = c.id"
" -> Table Scan on a (cost=0.00..431.00 rows=1 width=26)"
" -> Hash (cost=431.00..431.00 rows=1 width=8)"
" -> Table Scan on c (cost=0.00..431.00 rows=1 width=8)"
"Settings: optimizer_join_arity_for_associativity_commutativity=18"
"Optimizer status: PQO version 2.42.0"
*/
我创建了两个具有 100 列的 tables(A, B),相同的 DDL,只是 B 被分区了
CREATE TABLE A (
id integer, ......, col integer,
CONSTRAINT A_pkey PRIMARY KEY (id))
WITH (OIDS = FALSE)
TABLESPACE pg_default
DISTRIBUTED BY (id);
CREATE TABLE B (
id integer, ......, col integer,
CONSTRAINT B_pkey PRIMARY KEY (id))
WITH (OIDS = FALSE)
TABLESPACE pg_default
DISTRIBUTED BY (id)
PARTITION BY RANGE(id)
(START (1) END (2100000) EVERY (500000),
DEFAULT PARTITION extra
);
并将相同的数据(2000000 行)导入 A 和 B。然后我分别对 A 和 B 执行 sql:
UPDATE A a SET a.col = c.col from C c where c.id = a.id
UPDATE B b SET b.col = c.col from C c where c.id = b.id
结果A过了一分钟就成功了,B却花了很长时间,最后出现了内存错误:
ERROR: Canceling query because of high VMEM usage.
所以我检查了两个 sql 的 EXPLAIN,我发现 A 使用了 Hash Join 而 B 使用了 Nested-Loop Join.
分区 table 使用嵌套循环连接有什么原因吗? greenplum在存储百万级数据时是否不需要使用table分区?
您正在做一些不推荐的事情,这可能解释了为什么您会看到嵌套循环。
- 通常避免 UPDATE 语句。该行的旧版本加上该行的新版本保留在磁盘上。因此,如果您更新整个 table,您实际上是在将其使用的磁盘物理大小增加一倍。
- 我从未见过用于分区 table 的堆 table。您应该在 Greenplum 中主要使用 Append Only tables,尤其是在较大的 tables 上,例如分区 table.
- 您正在按分发键进行分区。这是不推荐的,根本没有好处。您打算按一系列 ID 进行过滤吗?这很不寻常。如果是这样,请将分发密钥更改为其他内容。
- 我认为 Pivotal 禁用了在分区 table 上创建主键的功能。曾几何时,这是不允许的。我不鼓励您创建任何主键,因为它只占用 space 而优化器通常不使用它。
修复这些项目后,我无法重现您的嵌套循环问题。我也在用5.0.0版本。
drop table if exists a;
drop table if exists b;
drop table if exists c;
CREATE TABLE A
(id integer, col integer, mydate timestamp)
WITH (appendonly=true)
DISTRIBUTED BY (id);
CREATE TABLE B
(id integer, col integer, mydate timestamp)
WITH (appendonly=true)
DISTRIBUTED BY (id)
PARTITION BY RANGE(mydate)
(START ('2015-01-01'::timestamp) END ('2018-12-31'::timestamp) EVERY ('1 month'::interval),
DEFAULT PARTITION extra
);
create table c
(id integer, col integer, mydate timestamp)
distributed by (id);
insert into a
select i, i+10, '2015-01-01'::timestamp + '1 day'::interval*i
from generate_series(0, 2000) as i
where '2015-01-01'::timestamp + '1 day'::interval*i < '2019-01-01'::timestamp;
insert into b
select i, i+10, '2015-01-01'::timestamp + '1 day'::interval*i
from generate_series(0, 2000) as i
where '2015-01-01'::timestamp + '1 day'::interval*i < '2019-01-01'::timestamp;
insert into c
select i, i+10, '2015-01-01'::timestamp + '1 day'::interval*i
from generate_series(0, 2000) as i
where '2015-01-01'::timestamp + '1 day'::interval*i < '2019-01-01'::timestamp;
explain UPDATE A a SET col = c.col from C c where c.id = a.id;
/*
"Update (cost=0.00..862.13 rows=1 width=1)"
" -> Result (cost=0.00..862.00 rows=1 width=34)"
" -> Split (cost=0.00..862.00 rows=1 width=30)"
" -> Hash Join (cost=0.00..862.00 rows=1 width=30)"
" Hash Cond: public.a.id = c.id"
" -> Table Scan on a (cost=0.00..431.00 rows=1 width=26)"
" -> Hash (cost=431.00..431.00 rows=1 width=8)"
" -> Table Scan on c (cost=0.00..431.00 rows=1 width=8)"
"Settings: optimizer_join_arity_for_associativity_commutativity=18"
"Optimizer status: PQO version 2.42.0"
*/
explain UPDATE B b SET col = c.col from C c where c.id = b.id;
/*
"Update (cost=0.00..862.13 rows=1 width=1)"
" -> Result (cost=0.00..862.00 rows=1 width=34)"
" -> Split (cost=0.00..862.00 rows=1 width=30)"
" -> Hash Join (cost=0.00..862.00 rows=1 width=30)"
" Hash Cond: public.a.id = c.id"
" -> Table Scan on a (cost=0.00..431.00 rows=1 width=26)"
" -> Hash (cost=431.00..431.00 rows=1 width=8)"
" -> Table Scan on c (cost=0.00..431.00 rows=1 width=8)"
"Settings: optimizer_join_arity_for_associativity_commutativity=18"
"Optimizer status: PQO version 2.42.0"
*/