使用临时表删除其他表中的重复行及其依赖项

Delete duplicate rows and its dependencies in other tables by using Temporary tables

我有一个产品 table 希望包含一个 重复的行 及其 uniq Idreferenced in other 6 tables.

我想删除产品 table 和 其他 tables 中的这些重复行,这取决于我的产品。

我考虑使用临时 tables 来:

我有这个想法,但是不知道怎么去实现。

不知道temptables的选择对不对

感谢您的帮助和建议。

create table dbo.hasduplicates
(
    id int identity,
    --assume colA, colB is the entity/unique combo
    colA varchar(10),
    colB int,
    someOtherColumn varchar(40)
);


insert into dbo.hasduplicates(colA, colB, someOtherColumn)
values
('A', 1, 'A1 - 1'),
('A', 1, 'A1 - 2'),
('A', 1, 'A1 - 3'),
--
('A', 2, 'A2 - 1'),
('A', 2, 'A2 - 2'),
--
('B', 1, 'B1 - 1'),
('B', 1, 'B1 - 2'),
('B', 1, 'B1 - 3');


select *
from dbo.hasduplicates;


--temp table holding the to-be-deleted ids (of the duplicates)
create table #ToBedeleted(IdToDelete int);

with dup
as
(
    select *, row_number() over (partition by colA, colB /*<--cols of your entity go here*/ order by id) as RowNum
    from dbo.hasduplicates
)
insert into #ToBedeleted(IdToDelete)
select Id
from dup
where RowNum >= 2;

--contains the ids for deletion
select * from #ToBedeleted;

--cleanup the referencing tables
/*
DELETE FROM dbo.Table1 WHERE Table1Id IN (SELECT IdToDelete FROM #ToBedeleted);
DELETE FROM dbo.Table2 WHERE Table2Id IN (SELECT IdToDelete FROM #ToBedeleted);
.............
DELETE FROM dbo.Table6 WHERE Table6Id IN (SELECT IdToDelete FROM #ToBedeleted);
--finally cleanup your products table
DELETE FROM dbo.hasduplicates WHERE Id IN (SELECT IdToDelete FROM #ToBedeleted);
*/

--/*
drop table #ToBedeleted;
drop table dbo.hasduplicates;
--*/

一种方法是将重复的id存储在变量中,并根据id删除重复的记录。 (假设 ProductRefTable 是引用 table & ProductId 是外键)

  CREATE TABLE Product
    (
     ID INT NOT NULL IDENTITY(1,1),
     Value INT,
     CONSTRAINT PK_ID PRIMARY KEY(ID)  
    )


    INSERT INTO Product([Value])
    VALUES(1),(2),(3),(4),(5),(5),(3),(5)

DECLARE @DupIDS varchar(max)='';   
SELECT @DupIDS =STRING_AGG(ID,',')
FROM Product 
WHERE ID NOT IN (SELECT min(ID) 
                 FROM Product 
                 GROUP BY Value)


Delete From Product 

WHERE id  in (SELECT value FROM STRING_SPLIT(@DupIDS , ','))  


Delete From ProductRefTable

WHERE ProductId  IN (select STRING_SPLIT(@DupIDS ,',')

假设重复的产品具有相同的 product.name

如果有更多的重复标准,则相应地调整 EXISTS 中的标准。

create table #tmpProductsToDelete (product_id int primary key);

-- 
-- collect the products that have a higher id with the same name in the temp table
--
insert into #tmpProductsToDelete
select id
from dbo.Product t1
where exists
(
    select 1
    from dbo.Product t2
    where t2.name = t1.name
      -- and t2.colA = t1.colA
      -- and t2.colB = t1.colB
      and t2.id > t1.id
);

然后仔细检查这些是否是要删除的产品。

select * 
from dbo.Product
where id in (select product_id from #tmpProductsToDelete);

也许首先将这些复制品复制到 Product 的副本中。 与产品 table 的 FK 相同的 6 tables。

IF OBJECT_ID('dbo.cpyProduct', 'U') IS NULL
BEGIN
  SELECT TOP 0 *, GetDate() as RemoveOn
  INTO dbo.cpyProduct FROM dbo.Product
  UNION
  SELECT TOP 0 *, NULL FROM dbo.Product;
END;

INSERT INTO dbo.cpyProduct
SELECT *, GetDate() AS RemoveOn
FROM dbo.Product
WHERE id IN (select product_id from #tmpProductsToDelete); 

IF OBJECT_ID('dbo.cpyTable1', 'U') IS NULL
BEGIN
  SELECT TOP 0 *, GetDate() as RemoveOn
  INTO dbo.cpyTable1 FROM dbo.Table1
  UNION ALL
  SELECT TOP 0 *, NULL FROM dbo.Table1;
END;

INSERT INTO dbo.cpyTable1
SELECT *, GetDate() AS RemoveOn
FROM dbo.Table1
WHERE product_id IN (select product_id from #tmpProductsToDelete); 

IF OBJECT_ID('dbo.cpyTable2', 'U') IS NULL
BEGIN
  SELECT TOP 0 *, GetDate() as RemoveOn
  INTO dbo.cpyTable2 FROM dbo.Table2
  UNION ALL
  SELECT TOP 0 *, NULL FROM dbo.Table2;
END;

INSERT INTO dbo.cpyTable2
SELECT *, GetDate() AS RemoveOn
FROM dbo.Table2
WHERE product_id IN (select product_id from #tmpProductsToDelete); 

-- Rinse & repeat for the other 4 tables

SELECT * FROM dbo.cpyProduct;
SELECT * FROM dbo.cpyTable1;
SELECT * FROM dbo.cpyTable2;

然后清理。

--
-- delete them from the 6 tables with a FK to the products table
--

delete from dbo.Table1
where product_id in (select product_id from #tmpProductsToDelete);

delete from dbo.Table2
where product_id in (select product_id from #tmpProductsToDelete);

delete from dbo.Table3
where product_id in (select product_id from #tmpProductsToDelete);

delete from dbo.Table4
where product_id in (select product_id from #tmpProductsToDelete);

delete from dbo.Table5
where product_id in (select product_id from #tmpProductsToDelete);

delete from dbo.Table6
where product_id in (select product_id from #tmpProductsToDelete);

-- remove the dups from the base table
delete from dbo.Product
where id in (select product_id from #tmpProductsToDelete);

rextester 测试 here