从 Table 中删除重复的 ID - 性能改进
Delete repeated Ids from Table - Performance Improvement
我有一个 table 有重复的代码,我需要清理 table 删除重复的,但在 table.[=14 中至少还剩下一个=]
我的table是这样的:
FriendlyFunctionCode MemberFirmId FunctionLevel3Desc
1 Value1 Value2
1 Value2 Value3
2 Value4 Value5
我需要这样的东西:(剩下哪一行并不重要,只要至少有一行即可)
FriendlyFunctionCode MemberFirmId FunctionLevel3Desc
1 Value1 Value2
2 Value4 Value5
我有这个查询,但性能很糟糕
SELECT MemberFirmId, FriendlyFunctionCode
INTO #ToDeleteRepeated
FROM [dbo].[FirmFunction]
GROUP BY MemberFirmId, FriendlyFunctionCode
HAVING COUNT(1) > 1
DECLARE @Code VARCHAR(100), @Desc VARCHAR(250)
WHILE ((SELECT COUNT(1) FROM #ToDeleteRepeated) > 0)
BEGIN
SELECT TOP 1 @Code = FriendlyFunctionCode FROM #ToDeleteRepeated
WHILE ((SELECT COUNT(1) FROM [FirmFunction] WHERE FriendlyFunctionCode = @Code) > 0)
BEGIN
SELECT TOP 1 @Desc = FunctionLevel3Desc FROM [FirmFunction] WHERE FriendlyFunctionCode = @Code
DELETE FROM [FirmFunction] WHERE FriendlyFunctionCode = @Code AND FunctionLevel3Desc = @Desc
END
END
有什么建议吗?
您可以像这样使用窗口函数。不必使用游标(游标在 SQL 服务器中表现不佳)。您可以 运行 内部 select 单独查看它对行号的处理。
测试数据
CREATE TABLE #TestData (FriendlyFunctionCode int, MemberFirmId nvarchar(10), FunctionLevel3Desc nvarchar(10))
INSERT INTO #TestData
VALUES
(1,'Value1','Value2')
,(1,'Value2','Value3')
,(2,'Value4','Value5')
查询
SELECT
a.FriendlyFunctionCode
,a.MemberFirmId
,a.FunctionLevel3Desc
INTO #SavedData
FROM
(
SELECT
FriendlyFunctionCode
,MemberFirmId
,FunctionLevel3Desc
,ROW_NUMBER() OVER(PARTITION BY FriendlyFunctionCode ORDER BY FriendlyFunctionCode) RowNum
FROM #TestData
) a
WHERE a.RowNum = 1
TRUNCATE TABLE #TestData
INSERT INTO #TestData (FriendlyFunctionCode, MemberFirmId, FunctionLevel3Desc)
SELECT
FriendlyFunctionCode
,MemberFirmId
,FunctionLevel3Desc
FROM #SavedData
DROP TABLE #SavedData
结果
FriendlyFunctionCode MemberFirmId FunctionLevel3Desc
1 Value1 Value2
2 Value4 Value5
WITH CTE AS (SELECT MemberFirmId, FriendlyFunctionCode,
ROW_NUMBER() over (PARTITION by FriendlyFunctionCode ORDER BY FriendlyFunctionCode ) AS RN
FROM [dbo].[FirmFunction]
)
DELETE CTE WHERE CTE.RN >1
您可以只在 FunctionCode 上使用 MAX 和 group。
SELECT
FriendlyFunctionCode,
MAX(MemberFirmId) as MemberFirmId,
MAX(FunctionLevel3Desc) as FuncationLevel3Desc
INTO #StagingTable
FROM
FirmFunction
GROUP BY
FriendlyFunctionCode
然后截断您的 Table,然后将 select 放回其中...或者只是一起创建一个 table,然后将不同的(最大)记录插入其中。
TRUNCATE TABLE FirmFunction
INSERT INTO FirmFunction (FriendlyFunctionCode,MemberFirmId,FunctionLevel3Desc)
SELECT * FROM #StagingTable
这不如创建一个 table FirmFunction2 安全,例如使用与原始架构相同的架构,然后插入其中,然后重命名....
SELECT TOP 1 INTO FirmFunction2 FROM FirmFunction WHERE 1=0
INSERT INTO FirmFunction2 (FriendlyFunctionCode, MemberFirmId, FunctionLevel3Desc)
SELECT
FriendlyFunctionCode,
MAX(MemberFirmId) as MemberFirmId,
MAX(FunctionLevel3Desc) as FuncationLevel3Desc
INTO #StagingTable
FROM
FirmFunction
GROUP BY
FriendlyFunctionCode
然后您可以检查 FirmFunction2 中的日期,如果您满意...在删除另一个 table 后重命名它。
通过 row_number()
使用 CTE 删除
;with cte as (
select *, row_number() over(partition by friendlyfunctioncode order by memberfirmid) rn
from deletingtable)
delete from cte where rn > 1
这将按照以下执行计划执行:
Table/Clustered索引扫描-->排序(如果没有索引)-->分段-->序列项目-->筛选然后删除,
如果它在 FriendlyFunctionCode 上有适当的索引,它在单次扫描中执行得更快
我有一个 table 有重复的代码,我需要清理 table 删除重复的,但在 table.[=14 中至少还剩下一个=]
我的table是这样的:
FriendlyFunctionCode MemberFirmId FunctionLevel3Desc
1 Value1 Value2
1 Value2 Value3
2 Value4 Value5
我需要这样的东西:(剩下哪一行并不重要,只要至少有一行即可)
FriendlyFunctionCode MemberFirmId FunctionLevel3Desc
1 Value1 Value2
2 Value4 Value5
我有这个查询,但性能很糟糕
SELECT MemberFirmId, FriendlyFunctionCode
INTO #ToDeleteRepeated
FROM [dbo].[FirmFunction]
GROUP BY MemberFirmId, FriendlyFunctionCode
HAVING COUNT(1) > 1
DECLARE @Code VARCHAR(100), @Desc VARCHAR(250)
WHILE ((SELECT COUNT(1) FROM #ToDeleteRepeated) > 0)
BEGIN
SELECT TOP 1 @Code = FriendlyFunctionCode FROM #ToDeleteRepeated
WHILE ((SELECT COUNT(1) FROM [FirmFunction] WHERE FriendlyFunctionCode = @Code) > 0)
BEGIN
SELECT TOP 1 @Desc = FunctionLevel3Desc FROM [FirmFunction] WHERE FriendlyFunctionCode = @Code
DELETE FROM [FirmFunction] WHERE FriendlyFunctionCode = @Code AND FunctionLevel3Desc = @Desc
END
END
有什么建议吗?
您可以像这样使用窗口函数。不必使用游标(游标在 SQL 服务器中表现不佳)。您可以 运行 内部 select 单独查看它对行号的处理。
测试数据
CREATE TABLE #TestData (FriendlyFunctionCode int, MemberFirmId nvarchar(10), FunctionLevel3Desc nvarchar(10))
INSERT INTO #TestData
VALUES
(1,'Value1','Value2')
,(1,'Value2','Value3')
,(2,'Value4','Value5')
查询
SELECT
a.FriendlyFunctionCode
,a.MemberFirmId
,a.FunctionLevel3Desc
INTO #SavedData
FROM
(
SELECT
FriendlyFunctionCode
,MemberFirmId
,FunctionLevel3Desc
,ROW_NUMBER() OVER(PARTITION BY FriendlyFunctionCode ORDER BY FriendlyFunctionCode) RowNum
FROM #TestData
) a
WHERE a.RowNum = 1
TRUNCATE TABLE #TestData
INSERT INTO #TestData (FriendlyFunctionCode, MemberFirmId, FunctionLevel3Desc)
SELECT
FriendlyFunctionCode
,MemberFirmId
,FunctionLevel3Desc
FROM #SavedData
DROP TABLE #SavedData
结果
FriendlyFunctionCode MemberFirmId FunctionLevel3Desc
1 Value1 Value2
2 Value4 Value5
WITH CTE AS (SELECT MemberFirmId, FriendlyFunctionCode,
ROW_NUMBER() over (PARTITION by FriendlyFunctionCode ORDER BY FriendlyFunctionCode ) AS RN
FROM [dbo].[FirmFunction]
)
DELETE CTE WHERE CTE.RN >1
您可以只在 FunctionCode 上使用 MAX 和 group。
SELECT
FriendlyFunctionCode,
MAX(MemberFirmId) as MemberFirmId,
MAX(FunctionLevel3Desc) as FuncationLevel3Desc
INTO #StagingTable
FROM
FirmFunction
GROUP BY
FriendlyFunctionCode
然后截断您的 Table,然后将 select 放回其中...或者只是一起创建一个 table,然后将不同的(最大)记录插入其中。
TRUNCATE TABLE FirmFunction
INSERT INTO FirmFunction (FriendlyFunctionCode,MemberFirmId,FunctionLevel3Desc)
SELECT * FROM #StagingTable
这不如创建一个 table FirmFunction2 安全,例如使用与原始架构相同的架构,然后插入其中,然后重命名....
SELECT TOP 1 INTO FirmFunction2 FROM FirmFunction WHERE 1=0
INSERT INTO FirmFunction2 (FriendlyFunctionCode, MemberFirmId, FunctionLevel3Desc)
SELECT
FriendlyFunctionCode,
MAX(MemberFirmId) as MemberFirmId,
MAX(FunctionLevel3Desc) as FuncationLevel3Desc
INTO #StagingTable
FROM
FirmFunction
GROUP BY
FriendlyFunctionCode
然后您可以检查 FirmFunction2 中的日期,如果您满意...在删除另一个 table 后重命名它。
通过 row_number()
使用 CTE 删除;with cte as (
select *, row_number() over(partition by friendlyfunctioncode order by memberfirmid) rn
from deletingtable)
delete from cte where rn > 1
这将按照以下执行计划执行:
Table/Clustered索引扫描-->排序(如果没有索引)-->分段-->序列项目-->筛选然后删除,
如果它在 FriendlyFunctionCode 上有适当的索引,它在单次扫描中执行得更快