如何使用多列中的值分配 table?
How do I distributed a table using values from multiple columns?
根据 the Citus documentation,使用单列分布 table 很容易:
SELECT master_create_distributed_table('github_events', 'created_at', 'append');
有没有办法使用多列分布 table?例如,类似于:
SELECT master_create_distributed_table('github_events', 'user_id,site_id', 'append');
Citus 不支持按多列分布。但是,您可以创建复合类型和 partition your data by that composite type.
-- link 的内容在下面内联,以防 link 失效 --
复合类型的散列分区步骤
在主节点和所有工作节点上创建类型:
CREATE TYPE new_composite_type as (project_key text, date text);
创建一个检查相等性的函数,并将其与新类型的相等运算符相关联
CREATE FUNCTION equal_test_composite_type_function(new_composite_type, new_composite_type) RETURNS boolean
AS 'select .project_key = .project_key AND .date = .date;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
-- ... use that function to create a custom equality operator...
CREATE OPERATOR = (
LEFTARG = new_composite_type,
RIGHTARG = new_composite_type,
PROCEDURE = equal_test_composite_type_function,
HASHES
);
创建一个新的哈希函数。
注意:这只是一个简单的示例,可能无法提供良好的均匀散列分布。有几个很好的散列函数示例,可以在单独的 C 函数中实现,而不是 SQL.
CREATE FUNCTION new_composite_type_hash(new_composite_type) RETURNS int
AS 'SELECT hashtext( (.project_key || .date)::text);'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
为 BTREE 和 HASH 访问方法定义运算符 类:
CREATE OPERATOR CLASS new_op_fam_btree_class
DEFAULT FOR TYPE new_composite_type USING BTREE AS
OPERATOR 3 = (new_composite_type, new_composite_type);
CREATE OPERATOR CLASS new_op_fam_hash_class
DEFAULT FOR TYPE new_composite_type USING HASH AS
OPERATOR 1 = (new_composite_type, new_composite_type),
FUNCTION 1 new_composite_type_hash(new_composite_type);
使用新类型创建 table 并分发它。
CREATE TABLE composite_type_partitioned_table
(
id integer,
composite_column new_composite_type
);
SELECT master_create_distributed_table('composite_type_partitioned_table','composite_column', 'hash');
SELECT master_create_worker_shards('composite_type_partitioned_table', 4, 1);
运行 插入和 SELECT。请注意,正确的修剪将需要在这些查询中显示的引用。
INSERT INTO composite_type_partitioned_table VALUES (1, '("key1","20160101")'::new_composite_type);
INSERT INTO composite_type_partitioned_table VALUES (2, '("key1","20160102")'::new_composite_type);
INSERT INTO composite_type_partitioned_table VALUES (3, '("key2","20160101")'::new_composite_type);
INSERT INTO composite_type_partitioned_table VALUES (4, '("key2","20160102")'::new_composite_type);
SELECT * FROM composite_type_partitioned_table WHERE composite_column = '("key1", "20160101")'::new_composite_type;
UPDATE composite_type_partitioned_table SET id = 6 WHERE composite_column = '("key2", "20160101")'::new_composite_type;
SELECT * FROM composite_type_partitioned_table WHERE composite_column = '("key2", "20160101")'::new_composite_type;
其他说明:
有两点需要注意:
输入文件必须正确分隔才能使 copy_to_distributed_table 正常工作。为此,使用 COPY (SELECT ()::composite_type_field, .... );
从普通 table 到一个文件,然后加载。
为了使用 select 查询进行修剪,复合类型字段应该用引号引起来。
根据 the Citus documentation,使用单列分布 table 很容易:
SELECT master_create_distributed_table('github_events', 'created_at', 'append');
有没有办法使用多列分布 table?例如,类似于:
SELECT master_create_distributed_table('github_events', 'user_id,site_id', 'append');
Citus 不支持按多列分布。但是,您可以创建复合类型和 partition your data by that composite type.
-- link 的内容在下面内联,以防 link 失效 --
复合类型的散列分区步骤
在主节点和所有工作节点上创建类型:
CREATE TYPE new_composite_type as (project_key text, date text);
创建一个检查相等性的函数,并将其与新类型的相等运算符相关联
CREATE FUNCTION equal_test_composite_type_function(new_composite_type, new_composite_type) RETURNS boolean AS 'select .project_key = .project_key AND .date = .date;' LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT; -- ... use that function to create a custom equality operator... CREATE OPERATOR = ( LEFTARG = new_composite_type, RIGHTARG = new_composite_type, PROCEDURE = equal_test_composite_type_function, HASHES );
创建一个新的哈希函数。
注意:这只是一个简单的示例,可能无法提供良好的均匀散列分布。有几个很好的散列函数示例,可以在单独的 C 函数中实现,而不是 SQL.
CREATE FUNCTION new_composite_type_hash(new_composite_type) RETURNS int AS 'SELECT hashtext( (.project_key || .date)::text);' LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT;
为 BTREE 和 HASH 访问方法定义运算符 类:
CREATE OPERATOR CLASS new_op_fam_btree_class DEFAULT FOR TYPE new_composite_type USING BTREE AS OPERATOR 3 = (new_composite_type, new_composite_type); CREATE OPERATOR CLASS new_op_fam_hash_class DEFAULT FOR TYPE new_composite_type USING HASH AS OPERATOR 1 = (new_composite_type, new_composite_type), FUNCTION 1 new_composite_type_hash(new_composite_type);
使用新类型创建 table 并分发它。
CREATE TABLE composite_type_partitioned_table ( id integer, composite_column new_composite_type ); SELECT master_create_distributed_table('composite_type_partitioned_table','composite_column', 'hash'); SELECT master_create_worker_shards('composite_type_partitioned_table', 4, 1);
运行 插入和 SELECT。请注意,正确的修剪将需要在这些查询中显示的引用。
INSERT INTO composite_type_partitioned_table VALUES (1, '("key1","20160101")'::new_composite_type); INSERT INTO composite_type_partitioned_table VALUES (2, '("key1","20160102")'::new_composite_type); INSERT INTO composite_type_partitioned_table VALUES (3, '("key2","20160101")'::new_composite_type); INSERT INTO composite_type_partitioned_table VALUES (4, '("key2","20160102")'::new_composite_type); SELECT * FROM composite_type_partitioned_table WHERE composite_column = '("key1", "20160101")'::new_composite_type; UPDATE composite_type_partitioned_table SET id = 6 WHERE composite_column = '("key2", "20160101")'::new_composite_type; SELECT * FROM composite_type_partitioned_table WHERE composite_column = '("key2", "20160101")'::new_composite_type;
其他说明:
有两点需要注意:
输入文件必须正确分隔才能使 copy_to_distributed_table 正常工作。为此,使用
COPY (SELECT ()::composite_type_field, .... );
从普通 table 到一个文件,然后加载。为了使用 select 查询进行修剪,复合类型字段应该用引号引起来。