Postgres SQL Table 按范围时间戳分区而不是唯一键冲突
Postgres SQL Table Partitioning by Range Timestamp not Unique key Collision
我在尝试修改现有 PostgreSQL (版本 13.3) table 以支持分区时遇到问题从旧 table 插入新数据时卡住,因为在某些情况下插入的时间戳可能不是唯一的,因此执行失败。
分区迫使我将主分区创建为 range (timestamp)
值。您可以在下面看到 new table 定义:
CREATE TABLE "UserFavorites_master" (
"Id" int4 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
"UserId" int4 NOT NULL,
"CardId" int4 NOT NULL,
"CreationDate" timestamp NOT NULL,
CONSTRAINT "PK_UserFavorites_CreationDate" PRIMARY KEY ("CreationDate")
) partition by range ("CreationDate");
最初的 table 没有对时间戳的唯一性或主键的约束,我们也不会特别想要它,但这似乎是分区的要求。寻找解决问题的替代方案或好主意。
完整代码如下:
alter table "UserFavorites" rename to "UserFavorites_old";
CREATE TABLE "UserFavorites_master" (
"Id" int4 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
"UserId" int4 NOT NULL,
"CardId" int4 NOT NULL,
"CreationDate" timestamp NOT NULL,
CONSTRAINT "PK_UserFavorites_CreationDate" PRIMARY KEY ("CreationDate")
) partition by range ("CreationDate");
-- Frome Reference:
create or replace function createPartitionIfNotExists(forDate timestamp) returns void
as $body$
declare yearStart date := date_trunc('year', forDate);
declare yearEndExclusive date := yearStart + interval '1 year';
declare tableName text := 'UserFavorites_Partition_' || to_char(forDate, 'YYYY');
begin
if to_regclass(tableName) is null then
execute format('create table %I partition of "UserFavorites_master" for values from (%L) to (%L)', tableName, yearStart, yearEndExclusive);
-- Unfortunatelly Postgres forces us to define index for each table individually:
--execute format('create unique index on %I (%I)', tableName, 'UserId'::text);
end if;
end;
$body$ language plpgsql;
do
$$
declare rec record;
begin
loop
for rec in 2015..2030 loop
-- ... and create a partition for them
perform createPartitionIfNotExists(to_date(rec::varchar,'yyyy'));
end loop;
end
$$;
create or replace view "UserFavorites" as select * from "UserFavorites_master";
insert into "UserFavorites" ("Id", "UserId", "CardId", "CreationDate") select * from "UserFavorites_old";
它在最后一行失败并出现以下错误:
SQL Error [23505]: ERROR: duplicate key value violates unique constraint "UserFavorites_Partition_2020_pkey"
Detail: Key ("CreationDate")=(2020-11-02 09:38:54.997) already exists.
ERROR: duplicate key value violates unique constraint "UserFavorites_Partition_2020_pkey"
Detail: Key ("CreationDate")=(2020-11-02 09:38:54.997) already exists.
ERROR: duplicate key value violates unique constraint "UserFavorites_Partition_2020_pkey"
Detail: Key ("CreationDate")=(2020-11-02 09:38:54.997) already exists.
不,分区不会强制您创建主键。只需省略该行,您的示例应该可以工作。
但是,您绝对应该始终在表上设置主键。否则,您可能会得到相同的行,这在关系数据库中是一个令人头疼的问题。您可能需要清理数据。
@Laurenz Albe 是正确的,我似乎也有能力指定多个键,尽管它可能会影响性能,如此处 Multiple Keys Performance 所述,即使索引分区的创建日期似乎也会使性能变差.
您可以在下面看到对多个密钥的引用,您的里程可能会有所不同。
CREATE TABLE "UserFavorites_master" (
"Id" int4 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
"UserId" int4 NOT NULL,
"CardId" int4 NOT NULL,
"CreationDate" timestamp NOT NULL,
CONSTRAINT "PK_UserFavorites" PRIMARY KEY ("Id", "CreationDate")
) partition by range ("CreationDate");
我在尝试修改现有 PostgreSQL (版本 13.3) table 以支持分区时遇到问题从旧 table 插入新数据时卡住,因为在某些情况下插入的时间戳可能不是唯一的,因此执行失败。
分区迫使我将主分区创建为 range (timestamp)
值。您可以在下面看到 new table 定义:
CREATE TABLE "UserFavorites_master" (
"Id" int4 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
"UserId" int4 NOT NULL,
"CardId" int4 NOT NULL,
"CreationDate" timestamp NOT NULL,
CONSTRAINT "PK_UserFavorites_CreationDate" PRIMARY KEY ("CreationDate")
) partition by range ("CreationDate");
最初的 table 没有对时间戳的唯一性或主键的约束,我们也不会特别想要它,但这似乎是分区的要求。寻找解决问题的替代方案或好主意。
完整代码如下:
alter table "UserFavorites" rename to "UserFavorites_old";
CREATE TABLE "UserFavorites_master" (
"Id" int4 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
"UserId" int4 NOT NULL,
"CardId" int4 NOT NULL,
"CreationDate" timestamp NOT NULL,
CONSTRAINT "PK_UserFavorites_CreationDate" PRIMARY KEY ("CreationDate")
) partition by range ("CreationDate");
-- Frome Reference:
create or replace function createPartitionIfNotExists(forDate timestamp) returns void
as $body$
declare yearStart date := date_trunc('year', forDate);
declare yearEndExclusive date := yearStart + interval '1 year';
declare tableName text := 'UserFavorites_Partition_' || to_char(forDate, 'YYYY');
begin
if to_regclass(tableName) is null then
execute format('create table %I partition of "UserFavorites_master" for values from (%L) to (%L)', tableName, yearStart, yearEndExclusive);
-- Unfortunatelly Postgres forces us to define index for each table individually:
--execute format('create unique index on %I (%I)', tableName, 'UserId'::text);
end if;
end;
$body$ language plpgsql;
do
$$
declare rec record;
begin
loop
for rec in 2015..2030 loop
-- ... and create a partition for them
perform createPartitionIfNotExists(to_date(rec::varchar,'yyyy'));
end loop;
end
$$;
create or replace view "UserFavorites" as select * from "UserFavorites_master";
insert into "UserFavorites" ("Id", "UserId", "CardId", "CreationDate") select * from "UserFavorites_old";
它在最后一行失败并出现以下错误:
SQL Error [23505]: ERROR: duplicate key value violates unique constraint "UserFavorites_Partition_2020_pkey"
Detail: Key ("CreationDate")=(2020-11-02 09:38:54.997) already exists.
ERROR: duplicate key value violates unique constraint "UserFavorites_Partition_2020_pkey"
Detail: Key ("CreationDate")=(2020-11-02 09:38:54.997) already exists.
ERROR: duplicate key value violates unique constraint "UserFavorites_Partition_2020_pkey"
Detail: Key ("CreationDate")=(2020-11-02 09:38:54.997) already exists.
不,分区不会强制您创建主键。只需省略该行,您的示例应该可以工作。
但是,您绝对应该始终在表上设置主键。否则,您可能会得到相同的行,这在关系数据库中是一个令人头疼的问题。您可能需要清理数据。
@Laurenz Albe 是正确的,我似乎也有能力指定多个键,尽管它可能会影响性能,如此处 Multiple Keys Performance 所述,即使索引分区的创建日期似乎也会使性能变差.
您可以在下面看到对多个密钥的引用,您的里程可能会有所不同。
CREATE TABLE "UserFavorites_master" (
"Id" int4 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
"UserId" int4 NOT NULL,
"CardId" int4 NOT NULL,
"CreationDate" timestamp NOT NULL,
CONSTRAINT "PK_UserFavorites" PRIMARY KEY ("Id", "CreationDate")
) partition by range ("CreationDate");