从 Oracle 中删除行 ID 最少的重复项
Remove duplicates with least row ids from Oracle
我有一个数据库 table 看起来像
ID
Book_no
Book_name
Book_category
ID1
1
B1
CB1
ID1
2
B1
CB1
ID1
3
B2
CB1
ID1
4
B2
CB1
ID1
5
B3
CB1
ID2
1
B1
CB2
ID2
2
B1
CB2
ID2
3
B2
CB2
而预期的结果就像
ID
Book_No
Book_name
Book_category
ID1
2
B1
CB1
ID1
4
B2
CB1
ID1
5
B3
CB1
ID2
2
B1
CB2
ID2
3
B2
CB2
我想在 ID
、Book_name
和 Book_category
的基础上删除 table 中的重复记录。下面的查询删除了重复的记录,但结果不是预期的。因为我想删除除了最高 Book_no 之外的所有重复记录。想保持最高的Book_no并删除所有其他重复项。
您可以使用 lead()
并过滤名称更改的地方:
select t.*
from (select t.*,
lead(book_name) over (partition by id, book_category order by book_no) as next_book_name
from t
) t
where next_book_name is null or next_book_name <> book_name;
假设您的 table 看起来像这样:
create table books (id, book_no, book_name, book_category) as
select 'ID1', 1, 'B1', 'CB1' from dual union all
select 'ID1', 2, 'B1', 'CB1' from dual union all
select 'ID1', 3, 'B2', 'CB1' from dual union all
select 'ID1', 4, 'B2', 'CB1' from dual union all
select 'ID1', 5, 'B3', 'CB1' from dual union all
select 'ID2', 1, 'B1', 'CB2' from dual union all
select 'ID2', 2, 'B1', 'CB2' from dual union all
select 'ID2', 3, 'B2', 'CB2' from dual
;
您可以使用 delete
语句,将每一行与您要保留的行进行比较。当按其他列分组时,您想要保留的那些具有 max(book_no)
。所以:
delete from books
where (id, book_no, book_name, book_category) not in
(
select id, max(book_no), book_name, book_category
from books
group by id, book_name, book_category
)
;
这假设列是非空的;如果 table 中可能有 null
,则需要更仔细地重写它,使用 not exists
条件而不是 not in
,但思路是一样的.
您可以 DELETE
关联 ROWID
伪列:
DELETE FROM table_name
WHERE ROWID IN (
SELECT rid
FROM (
SELECT ROWID AS rid,
ROW_NUMBER() OVER (
PARTITION BY id, book_name, book_category
ORDER BY book_no DESC
) AS rn
FROM table_name
)
WHERE rn > 1
);
其中,对于示例数据:
CREATE TABLE table_name (id, book_no, book_name, book_category) AS
SELECT 'ID1', 1, 'B1', 'CB1' FROM DUAL UNION ALL
SELECT 'ID1', 2, 'B1', 'CB1' FROM DUAL UNION ALL
SELECT 'ID1', 3, 'B2', 'CB1' FROM DUAL UNION ALL
SELECT 'ID1', 4, 'B2', 'CB1' FROM DUAL UNION ALL
SELECT 'ID1', 5, 'B3', 'CB1' FROM DUAL UNION ALL
SELECT 'ID2', 1, 'B1', 'CB2' FROM DUAL UNION ALL
SELECT 'ID2', 2, 'B1', 'CB2' FROM DUAL UNION ALL
SELECT 'ID2', 3, 'B2', 'CB2' FROM DUAL;
那么剩下的行是:
SELECT * FROM table_name;
ID
BOOK_NO
BOOK_NAME
BOOK_CATEGORY
ID1
2
B1
CB1
ID1
4
B2
CB1
ID1
5
B3
CB1
ID2
2
B1
CB2
ID2
3
B2
CB2
sqlfiddle here
您可以使用 first_value 查找所需的 ID,也可以使用 delete 删除其他 ID。
SELECT 不同的 a.*, FIRST_VALUE(a.book_name)
OVER (ORDER BY a.book_name DESC
范围在无界先行和无界跟随之间)
作为“最高”
FROM ( select * from books group by id, Book_name, book_category ) a;
SELECT 不同的 FIRST_VALUE(column_name)
OVER(按 column_name DESC 排序
范围在无界先行和无界跟随之间)
作为“最高”
FROM (select * from tables group by "column_names_to_be_grouped");
我有一个数据库 table 看起来像
ID | Book_no | Book_name | Book_category |
---|---|---|---|
ID1 | 1 | B1 | CB1 |
ID1 | 2 | B1 | CB1 |
ID1 | 3 | B2 | CB1 |
ID1 | 4 | B2 | CB1 |
ID1 | 5 | B3 | CB1 |
ID2 | 1 | B1 | CB2 |
ID2 | 2 | B1 | CB2 |
ID2 | 3 | B2 | CB2 |
而预期的结果就像
ID | Book_No | Book_name | Book_category |
---|---|---|---|
ID1 | 2 | B1 | CB1 |
ID1 | 4 | B2 | CB1 |
ID1 | 5 | B3 | CB1 |
ID2 | 2 | B1 | CB2 |
ID2 | 3 | B2 | CB2 |
我想在 ID
、Book_name
和 Book_category
的基础上删除 table 中的重复记录。下面的查询删除了重复的记录,但结果不是预期的。因为我想删除除了最高 Book_no 之外的所有重复记录。想保持最高的Book_no并删除所有其他重复项。
您可以使用 lead()
并过滤名称更改的地方:
select t.*
from (select t.*,
lead(book_name) over (partition by id, book_category order by book_no) as next_book_name
from t
) t
where next_book_name is null or next_book_name <> book_name;
假设您的 table 看起来像这样:
create table books (id, book_no, book_name, book_category) as
select 'ID1', 1, 'B1', 'CB1' from dual union all
select 'ID1', 2, 'B1', 'CB1' from dual union all
select 'ID1', 3, 'B2', 'CB1' from dual union all
select 'ID1', 4, 'B2', 'CB1' from dual union all
select 'ID1', 5, 'B3', 'CB1' from dual union all
select 'ID2', 1, 'B1', 'CB2' from dual union all
select 'ID2', 2, 'B1', 'CB2' from dual union all
select 'ID2', 3, 'B2', 'CB2' from dual
;
您可以使用 delete
语句,将每一行与您要保留的行进行比较。当按其他列分组时,您想要保留的那些具有 max(book_no)
。所以:
delete from books
where (id, book_no, book_name, book_category) not in
(
select id, max(book_no), book_name, book_category
from books
group by id, book_name, book_category
)
;
这假设列是非空的;如果 table 中可能有 null
,则需要更仔细地重写它,使用 not exists
条件而不是 not in
,但思路是一样的.
您可以 DELETE
关联 ROWID
伪列:
DELETE FROM table_name
WHERE ROWID IN (
SELECT rid
FROM (
SELECT ROWID AS rid,
ROW_NUMBER() OVER (
PARTITION BY id, book_name, book_category
ORDER BY book_no DESC
) AS rn
FROM table_name
)
WHERE rn > 1
);
其中,对于示例数据:
CREATE TABLE table_name (id, book_no, book_name, book_category) AS
SELECT 'ID1', 1, 'B1', 'CB1' FROM DUAL UNION ALL
SELECT 'ID1', 2, 'B1', 'CB1' FROM DUAL UNION ALL
SELECT 'ID1', 3, 'B2', 'CB1' FROM DUAL UNION ALL
SELECT 'ID1', 4, 'B2', 'CB1' FROM DUAL UNION ALL
SELECT 'ID1', 5, 'B3', 'CB1' FROM DUAL UNION ALL
SELECT 'ID2', 1, 'B1', 'CB2' FROM DUAL UNION ALL
SELECT 'ID2', 2, 'B1', 'CB2' FROM DUAL UNION ALL
SELECT 'ID2', 3, 'B2', 'CB2' FROM DUAL;
那么剩下的行是:
SELECT * FROM table_name;
ID BOOK_NO BOOK_NAME BOOK_CATEGORY ID1 2 B1 CB1 ID1 4 B2 CB1 ID1 5 B3 CB1 ID2 2 B1 CB2 ID2 3 B2 CB2
sqlfiddle here
您可以使用 first_value 查找所需的 ID,也可以使用 delete 删除其他 ID。
SELECT 不同的 a.*, FIRST_VALUE(a.book_name) OVER (ORDER BY a.book_name DESC 范围在无界先行和无界跟随之间) 作为“最高” FROM ( select * from books group by id, Book_name, book_category ) a;
SELECT 不同的 FIRST_VALUE(column_name) OVER(按 column_name DESC 排序 范围在无界先行和无界跟随之间) 作为“最高” FROM (select * from tables group by "column_names_to_be_grouped");