如何在 table 上合并行并更新 postgres 上的联结 table
How to merge rows on a table and update junction table on postgres
考虑具有多对多关系的 2 tables(table A
和 table B
),每个关系包含一个主键和其他属性。要映射此关系,有第三个关节 table (table C
),其中包含关系 (fk_tableA | fk_tableB
) 的每个 table 的外键。
Table B
包含重复的行(pk 除外),所以我想将这些行合并到一个具有任何唯一主键的记录中,就像这样:
table B table B (after merging duplicates)
1 | Henry | 100.0 1 | Henry | 100.0
2 | Jessi | 97.0 2 | Jessi | 97.0
3 | Henry | 100.0 4 | Erica | 11.2
4 | Erica | 11.2
通过合并这些记录,table C
(联合table)的外键可能指向不再存在的table B
的主键。我的目标是编辑它们以指向合并的记录:
合并前:
tableA table B table C
id | att1 id | att1 | att2 fk_A | fk_b
----------- ------------------- ------------
1 | ab123 1 | Henry | 100.0 1 | 1
2 | adawd 2 | Jessi | 97.0 2 | 3
3 | da3wf 3 | Henry | 100.0
4 | Erica | 11.2
在 table C
上,引用了来自 table B
的 2 条记录(1 和 3),它们恰好是重复的行。我的目标是将它们合并成一条记录(在 table B
中)并更新 table C
:
中的外键
合并后:
tableA table B table C
id | att1 id | att1 | att2 fk_A | fk_b
----------- ------------------- ------------
1 | ab123 1 | Henry | 100.0 1 | 1
2 | adawd 2 | Jessi | 97.0 2 | 1
3 | da3wf 4 | Erica | 11.2
- Note that id=3 was merged/deleted from table B and the same id
was updated on table C to point to the merged record's id.
所以我的问题基本上是 如何在合并 table?[=43= 的记录时更新联结点 table ] 我目前正在使用 Postgres 并处理数百万数据。
-- \i tmp.sql
CREATE TABLE persons
( id integer primary key
, name text
, weight decimal(4,1)
);
INSERT INTO persons(id,name,weight)VALUES
(1 ,'Henry', 100.0)
,(2 ,'Jessi', 97.0)
,(3 ,'Henry', 100.0)
,(4 ,'Erica', 11.)
;
CREATE TABLE junctiontab
( fk_A integer NOT NULL
, p_id integer REFERENCES persons(id)
, PRIMARY KEY (fk_A,p_id)
);
INSERT INTO junctiontab(fk_A, p_id)VALUES (1 , 1 ),(2 , 3 );
-- find the ids of affected persons.
-- [for simplicity: put them in a temp table]
CREATE TEMP table xlat AS
SELECT * FROM(
SELECt id AS wrong_id
,min(id) OVER (PARTITION BY name ORDER BY id) AS good_id
FROM persons p
) x
WHERE good_id <> wrong_id
;
--show it
SELECT *FROM xlat;
UPDATE junctiontab j
SET p_id = x.good_id
FROM xlat x
WHERE j.p_id = x.wrong_id
-- The good junction-entry *could* already exist...
AND NOT EXISTS (
SELECT *FROM junctiontab nx
WHERE nx.fk_A= j.fk_A
AND nx.p_id= x.good_id
)
;
DELETE FROM junctiontab d
-- if the good junction-entry already existed, we can delete the wrong one now.
WHERE EXISTS (
SELECT *FROM junctiontab g
JOIN xlat x ON g.p_id= x.good_id
AND d.p_id = x.wrong_id
WHERE g.fk_A= d.fk_A
)
;
--show it
SELECT *FROM junctiontab
;
-- Delete thewrongperson-records
DELETE FROM persons p
WHERE EXISTS (
SELECT *FROM xlat x
WHERE p.id = x.wrong_id
);
--show it
SELECT * FROM persons p;
结果:
DROP SCHEMA
CREATE SCHEMA
SET
CREATE TABLE
INSERT 0 4
CREATE TABLE
INSERT 0 2
SELECT 1
wrong_id | good_id
----------+---------
3 | 1
(1 row)
UPDATE 1
DELETE 0
fk_a | p_id
------+------
1 | 1
2 | 1
(2 rows)
DELETE 1
id | name | weight
----+-------+--------
1 | Henry | 100.0
2 | Jessi | 97.0
4 | Erica | 11.0
(3 rows)
考虑具有多对多关系的 2 tables(table A
和 table B
),每个关系包含一个主键和其他属性。要映射此关系,有第三个关节 table (table C
),其中包含关系 (fk_tableA | fk_tableB
) 的每个 table 的外键。
Table B
包含重复的行(pk 除外),所以我想将这些行合并到一个具有任何唯一主键的记录中,就像这样:
table B table B (after merging duplicates)
1 | Henry | 100.0 1 | Henry | 100.0
2 | Jessi | 97.0 2 | Jessi | 97.0
3 | Henry | 100.0 4 | Erica | 11.2
4 | Erica | 11.2
通过合并这些记录,table C
(联合table)的外键可能指向不再存在的table B
的主键。我的目标是编辑它们以指向合并的记录:
合并前:
tableA table B table C
id | att1 id | att1 | att2 fk_A | fk_b
----------- ------------------- ------------
1 | ab123 1 | Henry | 100.0 1 | 1
2 | adawd 2 | Jessi | 97.0 2 | 3
3 | da3wf 3 | Henry | 100.0
4 | Erica | 11.2
在 table C
上,引用了来自 table B
的 2 条记录(1 和 3),它们恰好是重复的行。我的目标是将它们合并成一条记录(在 table B
中)并更新 table C
:
合并后:
tableA table B table C
id | att1 id | att1 | att2 fk_A | fk_b
----------- ------------------- ------------
1 | ab123 1 | Henry | 100.0 1 | 1
2 | adawd 2 | Jessi | 97.0 2 | 1
3 | da3wf 4 | Erica | 11.2
- Note that id=3 was merged/deleted from table B and the same id
was updated on table C to point to the merged record's id.
所以我的问题基本上是 如何在合并 table?[=43= 的记录时更新联结点 table ] 我目前正在使用 Postgres 并处理数百万数据。
-- \i tmp.sql
CREATE TABLE persons
( id integer primary key
, name text
, weight decimal(4,1)
);
INSERT INTO persons(id,name,weight)VALUES
(1 ,'Henry', 100.0)
,(2 ,'Jessi', 97.0)
,(3 ,'Henry', 100.0)
,(4 ,'Erica', 11.)
;
CREATE TABLE junctiontab
( fk_A integer NOT NULL
, p_id integer REFERENCES persons(id)
, PRIMARY KEY (fk_A,p_id)
);
INSERT INTO junctiontab(fk_A, p_id)VALUES (1 , 1 ),(2 , 3 );
-- find the ids of affected persons.
-- [for simplicity: put them in a temp table]
CREATE TEMP table xlat AS
SELECT * FROM(
SELECt id AS wrong_id
,min(id) OVER (PARTITION BY name ORDER BY id) AS good_id
FROM persons p
) x
WHERE good_id <> wrong_id
;
--show it
SELECT *FROM xlat;
UPDATE junctiontab j
SET p_id = x.good_id
FROM xlat x
WHERE j.p_id = x.wrong_id
-- The good junction-entry *could* already exist...
AND NOT EXISTS (
SELECT *FROM junctiontab nx
WHERE nx.fk_A= j.fk_A
AND nx.p_id= x.good_id
)
;
DELETE FROM junctiontab d
-- if the good junction-entry already existed, we can delete the wrong one now.
WHERE EXISTS (
SELECT *FROM junctiontab g
JOIN xlat x ON g.p_id= x.good_id
AND d.p_id = x.wrong_id
WHERE g.fk_A= d.fk_A
)
;
--show it
SELECT *FROM junctiontab
;
-- Delete thewrongperson-records
DELETE FROM persons p
WHERE EXISTS (
SELECT *FROM xlat x
WHERE p.id = x.wrong_id
);
--show it
SELECT * FROM persons p;
结果:
DROP SCHEMA
CREATE SCHEMA
SET
CREATE TABLE
INSERT 0 4
CREATE TABLE
INSERT 0 2
SELECT 1
wrong_id | good_id
----------+---------
3 | 1
(1 row)
UPDATE 1
DELETE 0
fk_a | p_id
------+------
1 | 1
2 | 1
(2 rows)
DELETE 1
id | name | weight
----+-------+--------
1 | Henry | 100.0
2 | Jessi | 97.0
4 | Erica | 11.0
(3 rows)