显示重复的所有 RowNumber 记录?
Show all RowNumber records for duplicates?
我有 table 个字符串资源:
;WITH cte AS
(
SELECT 1 AS id , 'john' AS name, 10 AS age
UNION
SELECT 2 AS id , 'john' AS name, 10 AS age
UNION
SELECT 3 AS id , 'john' AS name, 12 AS age
UNION
SELECT 4 AS id , 'paul' AS name, 6 AS age
UNION
SELECT 5 AS id , 'paul ' AS name, 6 AS age
UNION
SELECT 6 AS id , 'paul different' AS name, 7 AS age
UNION
SELECT 7 AS id , 'ringo' AS name, 2 AS age
)
所以名字“john”的年龄是 10 岁。
后来,其他人(不是我) 也添加了 10 岁的“john”。
所以我想清除所有重复项。
但这不是问题所在。在删除之前,我想查看所有重复项。
所以我这样做了:
SELECT *
FROM (
SELECT ID,
name,
age,
ROW_NUMBER() OVER(PARTITION BY name, age ORDER BY id) AS rn
FROM cte
) a WHERE a.rn>1
ORDER BY
name,
age,
a.rn
结果:
这基本上显示了重复项。但我还想查看 rn=1
的位置,前提是当前值有更多版本。
问题
换句话说:我怎样才能增强我的查询:
- 仅当存在该记录的版本时才显示记录的所有个版本(所有行号,
rn
)
想要的结果:
ID name age rn
1 john 10 1
2 john 10 2
4 paul 6 1
5 paul 6 2
注意我知道我可以通过重新扫描 table 来获得相同的 name
和 age
。但是我想如果有更优雅的方法。
使用exists
运算符查找duplicated
的名称。试试这个。
;WITH cte AS
(
SELECT 1 AS id , 'john' AS name, 10 AS age
UNION
SELECT 2 AS id , 'john' AS name, 10 AS age
UNION
SELECT 3 AS id , 'john' AS name, 12 AS age
UNION
SELECT 4 AS id , 'paul' AS name, 6 AS age
UNION
SELECT 5 AS id , 'paul ' AS name, 6 AS age
UNION
SELECT 6 AS id , 'paul different' AS name, 7 AS age
UNION
SELECT 7 AS id , 'ringo' AS name, 2 AS age
)
, cte1
AS (SELECT ID,
name,
age,
Row_number() OVER(PARTITION BY name, age ORDER BY id) AS rn
FROM cte)
SELECT *
FROM cte1 a
WHERE EXISTS (SELECT 1
FROM cte1 b
WHERE a.name = b.name and a.age=b.age
AND b.rn > 1)
ORDER BY name, age, a.rn
或使用Inner Join
SELECT a.id,a.name,a.age
FROM cte1 a
JOIN cte1 b
ON a.name = b.name
AND a.age = b.age
AND b.rn > 1
ORDER BY a.name, a.age, a.rn
或者要在单次 table 扫描中使用 Dense_Rank
加上 window function
;WITH cte AS
(
SELECT 1 AS id , 'john' AS name, 10 AS age
UNION
SELECT 2 AS id , 'john' AS name, 10 AS age
UNION
SELECT 3 AS id , 'john' AS name, 12 AS age
UNION
SELECT 4 AS id , 'paul' AS name, 6 AS age
UNION
SELECT 5 AS id , 'paul ' AS name, 6 AS age
UNION
SELECT 6 AS id , 'paul different' AS name, 7 AS age
UNION
SELECT 7 AS id , 'ringo' AS name, 2 AS age
)
, cte1
AS (SELECT ID,
name,
age,
count(age) over (partition by name,age) cnt,
dense_rank() OVER(PARTITION BY name ORDER BY age) AS rn
FROM cte)
SELECT *
FROM cte1
WHERE rn = 1
AND cnt > 1
我有 table 个字符串资源:
;WITH cte AS
(
SELECT 1 AS id , 'john' AS name, 10 AS age
UNION
SELECT 2 AS id , 'john' AS name, 10 AS age
UNION
SELECT 3 AS id , 'john' AS name, 12 AS age
UNION
SELECT 4 AS id , 'paul' AS name, 6 AS age
UNION
SELECT 5 AS id , 'paul ' AS name, 6 AS age
UNION
SELECT 6 AS id , 'paul different' AS name, 7 AS age
UNION
SELECT 7 AS id , 'ringo' AS name, 2 AS age
)
所以名字“john”的年龄是 10 岁。
后来,其他人(不是我) 也添加了 10 岁的“john”。
所以我想清除所有重复项。
但这不是问题所在。在删除之前,我想查看所有重复项。
所以我这样做了:
SELECT *
FROM (
SELECT ID,
name,
age,
ROW_NUMBER() OVER(PARTITION BY name, age ORDER BY id) AS rn
FROM cte
) a WHERE a.rn>1
ORDER BY
name,
age,
a.rn
结果:
这基本上显示了重复项。但我还想查看 rn=1
的位置,前提是当前值有更多版本。
问题
换句话说:我怎样才能增强我的查询:
- 仅当存在该记录的版本时才显示记录的所有个版本(所有行号,
rn
)
想要的结果:
ID name age rn
1 john 10 1
2 john 10 2
4 paul 6 1
5 paul 6 2
注意我知道我可以通过重新扫描 table 来获得相同的 name
和 age
。但是我想如果有更优雅的方法。
使用exists
运算符查找duplicated
的名称。试试这个。
;WITH cte AS
(
SELECT 1 AS id , 'john' AS name, 10 AS age
UNION
SELECT 2 AS id , 'john' AS name, 10 AS age
UNION
SELECT 3 AS id , 'john' AS name, 12 AS age
UNION
SELECT 4 AS id , 'paul' AS name, 6 AS age
UNION
SELECT 5 AS id , 'paul ' AS name, 6 AS age
UNION
SELECT 6 AS id , 'paul different' AS name, 7 AS age
UNION
SELECT 7 AS id , 'ringo' AS name, 2 AS age
)
, cte1
AS (SELECT ID,
name,
age,
Row_number() OVER(PARTITION BY name, age ORDER BY id) AS rn
FROM cte)
SELECT *
FROM cte1 a
WHERE EXISTS (SELECT 1
FROM cte1 b
WHERE a.name = b.name and a.age=b.age
AND b.rn > 1)
ORDER BY name, age, a.rn
或使用Inner Join
SELECT a.id,a.name,a.age
FROM cte1 a
JOIN cte1 b
ON a.name = b.name
AND a.age = b.age
AND b.rn > 1
ORDER BY a.name, a.age, a.rn
或者要在单次 table 扫描中使用 Dense_Rank
加上 window function
;WITH cte AS
(
SELECT 1 AS id , 'john' AS name, 10 AS age
UNION
SELECT 2 AS id , 'john' AS name, 10 AS age
UNION
SELECT 3 AS id , 'john' AS name, 12 AS age
UNION
SELECT 4 AS id , 'paul' AS name, 6 AS age
UNION
SELECT 5 AS id , 'paul ' AS name, 6 AS age
UNION
SELECT 6 AS id , 'paul different' AS name, 7 AS age
UNION
SELECT 7 AS id , 'ringo' AS name, 2 AS age
)
, cte1
AS (SELECT ID,
name,
age,
count(age) over (partition by name,age) cnt,
dense_rank() OVER(PARTITION BY name ORDER BY age) AS rn
FROM cte)
SELECT *
FROM cte1
WHERE rn = 1
AND cnt > 1