SQL 服务器:ROW_NUMBER() OVER on table 变量以修改重复列
SQL Server : ROW_NUMBER() OVER on table variable to amend duplicate columns
假设我有 table 变量这样声明...
DECLARE @LocalTable TABLE
(
IdField NVARCHAR(MAX),
NameField NVARCHAR(MAX)
)
然后我像这样填充它...
INSERT INTO @LocalTable
SELECT
IdColumn,
NameColumn
FROM SourceTable
源 table 中的 NameColumn
可能具有重复值,因此本地 table 中的 NameField
将具有相同的重复值。
假设我想像这样将本地 table 插入目标 table...
INSERT INTO TargetTable (NewIdColumn, NewNameColumn)
SELECT
IdField,
NameField
FROM
@LocalTable
但是:TargetTable
中的 NewNameColumn
有一个 UNIQUE
约束,因此重复会导致异常。
我想应用这个例子,
ROW_NUMBER() OVER(PARTITION BY NameField ORDER BY NameField)
这样 NameField
就是 appended/suffixed 并且有一个数字表示它的重复。
我有这个可以 select 正确附加值的工作示例,但我无法让它在这样的更新语句中工作:
UPDATE localtable
SET NameField = AppendedNameField
FROM @LocalTable AS localtable
SELECT
CONCAT(Ref.NameField, ROW_NUMBER() OVER (PARTITION BY Ref.NameField
ORDER BY Source.IdField)), *
FROM
@LocalTable AS Source
INNER JOIN
@LocalTable AS Ref ON Ref.NameField = Source.NameField
AND Ref.IdField != Source.IdField
提前致谢。
只需将身份字段添加到临时文件 table。
DECLARE @LocalTable TABLE
(
ix int identity primary key,
IdField NVARCHAR(MAX),
NameField NVARCHAR(MAX)
)
Insert into @LocalTable(IdColumn, NameColumn)
SELECT
IdColumn,
NameColumn
FROM SourceTable
-- Make sure same names are consecutive in the table
ORDER BY NameColumn
像这样设置NameColumn:
update lt set
NameColumn = NameColumn
-- Add a number based on the ix, minus the lowest ix entry for the same name
+ cast(
(select lt.ix - min(lt2.ix) + 1
from @localTable lt2 where lt2.name = lt.name)
as nvarchar(10))
from @LocalTable lt
-- Only do those with duplicated names
where lt.NameColumn in (
select NameColumn from @localtable group by NameColumn having count(1) > 1
)
如果我明白你要做什么。
WITH CTE AS
(
SELECT
CONCAT(NameField, ROW_NUMBER()
OVER(PARTITION BY NameField ORDER BY IdField)) AS NewName, *
FROM @LocalTable
)
UPDATE
CTE SET Name = NewName
如果您只想对重复的名称执行此操作,您可以将 COUNT(*) OVER (PARTITION BY Name)
添加到 CTE 和使用它的条件逻辑中。
不一定要更新table,插入的时候加个后缀不行吗?
DECLARE @LocalTable TABLE (IdField INT, NameField VARCHAR(50));
INSERT @LocalTable VALUES (1, 'Not Duplicate'), (2, 'Duplicate'), (3, 'Duplicate');
INSERT INTO TargetTable (NewIdColumn, NewNameColumn)
SELECT IdField,
CONCAT(NameField,
CASE WHEN COUNT(*) OVER(PARTITION BY NameField) > 1
THEN ROW_NUMBER() OVER(PARTITION BY NameField ORDER BY IdField)
ELSE ''
END)
FROM @LocalTable
ORDER BY IdField;
或者,您可以通过简单地将上述 select 包装在子查询中并更新它来进行更新:
DECLARE @LocalTable TABLE (IdField INT, NameField VARCHAR(50));
INSERT @LocalTable VALUES (1, 'Not Duplicate'), (2, 'Duplicate'), (3, 'Duplicate');
UPDATE t
SET NameField = NewNameField
FROM
(
SELECT IdField, NameField,
NewNameField = CONCAT(NameField,
CASE WHEN COUNT(*) OVER(PARTITION BY NameField) > 1
THEN ROW_NUMBER() OVER(PARTITION BY NameField ORDER BY IdField)
ELSE ''
END)
FROM @LocalTable
) AS t;
SELECT * FROM @LocalTable;
首先,考虑使用 Temp table 而不是临时变量。
其次,尝试将 NVARCHAR(MAX) 更改为更小的值,例如 INT
以下是在 NameField 中仅包含唯一值的代码:
CREATE TABLE #LocalTable
(
IdField NVARCHAR(MAX),
NameField NVARCHAR(MAX)
)
INSERT INTO #LocalTable
VALUES (1,'A'), (2,'B'), (3,'B')
INSERT INTO TargetTable
(
NewIdColumn,
NewNameColumn
)
SELECT IdField, NameField
FROM #LocalTable
WHERE IdField in (
SELECT MIN(IdField) FROM #LocalTable
GROUP BY NameField
);
请注意 "IdField" 的重复记录将被忽略,不会插入目标 table。
尝试使用以下代码:
WITH TargetTable AS(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY Ref.NameField ORDER BY Source.IdField) AS UniqueID
FROM @LocalTable AS L
),
UpdatedData AS(
SELECT Source.NameField,
ROW_NUMBER() OVER(PARTITION BY Ref.NameField ORDER BY Source.IdField) AS UniqueID,
CONCAT(Ref.NameField, ROW_NUMBER() OVER(PARTITION BY Ref.NameField ORDER BY Source.IdField)) AS AppendedNameField
FROM @LocalTable AS Source
INNER JOIN @LocalTable AS Ref ON Ref.NameField = Source.NameField AND Ref.IdField != Source.IdField
)
UPDATE T
SET NameField=U.AppendedNameField
FROM TargetTable AS T
JOIN UpdatedData AS U ON T.NameField=U.NameField AND T.UniqueID=U.UniqueID;
假设我有 table 变量这样声明...
DECLARE @LocalTable TABLE
(
IdField NVARCHAR(MAX),
NameField NVARCHAR(MAX)
)
然后我像这样填充它...
INSERT INTO @LocalTable
SELECT
IdColumn,
NameColumn
FROM SourceTable
源 table 中的 NameColumn
可能具有重复值,因此本地 table 中的 NameField
将具有相同的重复值。
假设我想像这样将本地 table 插入目标 table...
INSERT INTO TargetTable (NewIdColumn, NewNameColumn)
SELECT
IdField,
NameField
FROM
@LocalTable
但是:TargetTable
中的 NewNameColumn
有一个 UNIQUE
约束,因此重复会导致异常。
我想应用这个例子,
ROW_NUMBER() OVER(PARTITION BY NameField ORDER BY NameField)
这样 NameField
就是 appended/suffixed 并且有一个数字表示它的重复。
我有这个可以 select 正确附加值的工作示例,但我无法让它在这样的更新语句中工作:
UPDATE localtable
SET NameField = AppendedNameField
FROM @LocalTable AS localtable
SELECT
CONCAT(Ref.NameField, ROW_NUMBER() OVER (PARTITION BY Ref.NameField
ORDER BY Source.IdField)), *
FROM
@LocalTable AS Source
INNER JOIN
@LocalTable AS Ref ON Ref.NameField = Source.NameField
AND Ref.IdField != Source.IdField
提前致谢。
只需将身份字段添加到临时文件 table。
DECLARE @LocalTable TABLE
(
ix int identity primary key,
IdField NVARCHAR(MAX),
NameField NVARCHAR(MAX)
)
Insert into @LocalTable(IdColumn, NameColumn)
SELECT
IdColumn,
NameColumn
FROM SourceTable
-- Make sure same names are consecutive in the table
ORDER BY NameColumn
像这样设置NameColumn:
update lt set
NameColumn = NameColumn
-- Add a number based on the ix, minus the lowest ix entry for the same name
+ cast(
(select lt.ix - min(lt2.ix) + 1
from @localTable lt2 where lt2.name = lt.name)
as nvarchar(10))
from @LocalTable lt
-- Only do those with duplicated names
where lt.NameColumn in (
select NameColumn from @localtable group by NameColumn having count(1) > 1
)
如果我明白你要做什么。
WITH CTE AS
(
SELECT
CONCAT(NameField, ROW_NUMBER()
OVER(PARTITION BY NameField ORDER BY IdField)) AS NewName, *
FROM @LocalTable
)
UPDATE
CTE SET Name = NewName
如果您只想对重复的名称执行此操作,您可以将 COUNT(*) OVER (PARTITION BY Name)
添加到 CTE 和使用它的条件逻辑中。
不一定要更新table,插入的时候加个后缀不行吗?
DECLARE @LocalTable TABLE (IdField INT, NameField VARCHAR(50));
INSERT @LocalTable VALUES (1, 'Not Duplicate'), (2, 'Duplicate'), (3, 'Duplicate');
INSERT INTO TargetTable (NewIdColumn, NewNameColumn)
SELECT IdField,
CONCAT(NameField,
CASE WHEN COUNT(*) OVER(PARTITION BY NameField) > 1
THEN ROW_NUMBER() OVER(PARTITION BY NameField ORDER BY IdField)
ELSE ''
END)
FROM @LocalTable
ORDER BY IdField;
或者,您可以通过简单地将上述 select 包装在子查询中并更新它来进行更新:
DECLARE @LocalTable TABLE (IdField INT, NameField VARCHAR(50));
INSERT @LocalTable VALUES (1, 'Not Duplicate'), (2, 'Duplicate'), (3, 'Duplicate');
UPDATE t
SET NameField = NewNameField
FROM
(
SELECT IdField, NameField,
NewNameField = CONCAT(NameField,
CASE WHEN COUNT(*) OVER(PARTITION BY NameField) > 1
THEN ROW_NUMBER() OVER(PARTITION BY NameField ORDER BY IdField)
ELSE ''
END)
FROM @LocalTable
) AS t;
SELECT * FROM @LocalTable;
首先,考虑使用 Temp table 而不是临时变量。
其次,尝试将 NVARCHAR(MAX) 更改为更小的值,例如 INT
以下是在 NameField 中仅包含唯一值的代码:
CREATE TABLE #LocalTable
(
IdField NVARCHAR(MAX),
NameField NVARCHAR(MAX)
)
INSERT INTO #LocalTable
VALUES (1,'A'), (2,'B'), (3,'B')
INSERT INTO TargetTable
(
NewIdColumn,
NewNameColumn
)
SELECT IdField, NameField
FROM #LocalTable
WHERE IdField in (
SELECT MIN(IdField) FROM #LocalTable
GROUP BY NameField
);
请注意 "IdField" 的重复记录将被忽略,不会插入目标 table。
尝试使用以下代码:
WITH TargetTable AS(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY Ref.NameField ORDER BY Source.IdField) AS UniqueID
FROM @LocalTable AS L
),
UpdatedData AS(
SELECT Source.NameField,
ROW_NUMBER() OVER(PARTITION BY Ref.NameField ORDER BY Source.IdField) AS UniqueID,
CONCAT(Ref.NameField, ROW_NUMBER() OVER(PARTITION BY Ref.NameField ORDER BY Source.IdField)) AS AppendedNameField
FROM @LocalTable AS Source
INNER JOIN @LocalTable AS Ref ON Ref.NameField = Source.NameField AND Ref.IdField != Source.IdField
)
UPDATE T
SET NameField=U.AppendedNameField
FROM TargetTable AS T
JOIN UpdatedData AS U ON T.NameField=U.NameField AND T.UniqueID=U.UniqueID;