在 Oracle SQL 中按不同值分组和排序
Grouping and Sorting By distinct values in Oracle's SQL
我有 table 这样的:
id
full_name
1
John Smith
2
Smith John
3
Jim Jonson
4
JimJonson
我想得到这样的东西:
id
full_name
1
John Smith
3
Jim Jonson
所以,我需要SELECT DISTINCT full_name FROM table
,这样
John Smith
和Smith John
是一回事,也是
Jim Jonson
和 JimJonson
希望我解释得很好。你能帮帮我吗?
循序渐进。阅读代码中的注释。
SQL> with test (id, full_name) as
2 -- sample data
3 (select 1, 'John Smith' from dual union all
4 select 2, 'Smith John' from dual union all
5 select 3, 'Jim Jonson' from dual union all
6 select 4, 'JimJonson' from dual
7 ),
8 temp as
9 -- split full name to rows
10 (select id,
11 regexp_substr(full_name, '[^ ]+', 1, column_value) val,
12 column_value cv
13 from test cross join
14 table(cast(multiset(select level from dual
15 connect by level <= regexp_count(full_name, ' ') + 1
16 ) as sys.odcinumberlist))
17 ),
18 temp2 as
19 -- aggregate full name with no space between "words"
20 (select id,
21 listagg(val, '') within group (order by val) full_name
22 from temp
23 group by id
24 ),
25 temp3 as
26 -- fetch only distinct values
27 (select min(b.id) id,
28 b.full_name
29 from temp2 b
30 group by b.full_name
31 )
32 -- finally, join TEMP3 and sample data
33 select b.id,
34 a.full_name
35 from test a join temp3 b on b.id = a.id
36 order by a.id;
ID FULL_NAME
---------- ----------------------------------------
1 John Smith
3 Jim Jonson
SQL>
您可以将 full_name 值按姓名和姓氏的首字母大写拆分,并按字母顺序排序,然后使用 LISTAGG()
函数合并,并在以下位置应用 MIN()
聚合最后一步如
WITH t(id,full_name) AS
(
SELECT 1, 'John Smith' FROM dual UNION ALL
SELECT 2, 'Smith John' FROM dual UNION ALL
SELECT 3, 'Jim Jonson' FROM dual UNION ALL
SELECT 4, 'JimJonson' FROM dual
), t2 AS
(
SELECT id,
TRIM(SUBSTR(full_name,column_value,LEAD(column_value,1,LENGTH(full_name)) OVER (PARTITION BY id ORDER BY id,column_value)-1)) AS names
FROM t,
TABLE(CAST(MULTISET(SELECT REGEXP_INSTR(full_name,'[A-Z]+',1,level)
FROM dual
CONNECT BY level <= REGEXP_COUNT(full_name,'[A-Z]')) AS sys.odcivarchar2list ))
), t3 AS
(
SELECT id, LISTAGG(names,' ') WITHIN GROUP (ORDER BY id,names) AS full_name
FROM t2
GROUP BY id
)
SELECT MIN(id) AS min_id, full_name
FROM t3
GROUP BY full_name
ORDER BY min_id
我有 table 这样的:
id | full_name |
---|---|
1 | John Smith |
2 | Smith John |
3 | Jim Jonson |
4 | JimJonson |
我想得到这样的东西:
id | full_name |
---|---|
1 | John Smith |
3 | Jim Jonson |
所以,我需要SELECT DISTINCT full_name FROM table
,这样
John Smith
和Smith John
是一回事,也是Jim Jonson
和JimJonson
希望我解释得很好。你能帮帮我吗?
循序渐进。阅读代码中的注释。
SQL> with test (id, full_name) as
2 -- sample data
3 (select 1, 'John Smith' from dual union all
4 select 2, 'Smith John' from dual union all
5 select 3, 'Jim Jonson' from dual union all
6 select 4, 'JimJonson' from dual
7 ),
8 temp as
9 -- split full name to rows
10 (select id,
11 regexp_substr(full_name, '[^ ]+', 1, column_value) val,
12 column_value cv
13 from test cross join
14 table(cast(multiset(select level from dual
15 connect by level <= regexp_count(full_name, ' ') + 1
16 ) as sys.odcinumberlist))
17 ),
18 temp2 as
19 -- aggregate full name with no space between "words"
20 (select id,
21 listagg(val, '') within group (order by val) full_name
22 from temp
23 group by id
24 ),
25 temp3 as
26 -- fetch only distinct values
27 (select min(b.id) id,
28 b.full_name
29 from temp2 b
30 group by b.full_name
31 )
32 -- finally, join TEMP3 and sample data
33 select b.id,
34 a.full_name
35 from test a join temp3 b on b.id = a.id
36 order by a.id;
ID FULL_NAME
---------- ----------------------------------------
1 John Smith
3 Jim Jonson
SQL>
您可以将 full_name 值按姓名和姓氏的首字母大写拆分,并按字母顺序排序,然后使用 LISTAGG()
函数合并,并在以下位置应用 MIN()
聚合最后一步如
WITH t(id,full_name) AS
(
SELECT 1, 'John Smith' FROM dual UNION ALL
SELECT 2, 'Smith John' FROM dual UNION ALL
SELECT 3, 'Jim Jonson' FROM dual UNION ALL
SELECT 4, 'JimJonson' FROM dual
), t2 AS
(
SELECT id,
TRIM(SUBSTR(full_name,column_value,LEAD(column_value,1,LENGTH(full_name)) OVER (PARTITION BY id ORDER BY id,column_value)-1)) AS names
FROM t,
TABLE(CAST(MULTISET(SELECT REGEXP_INSTR(full_name,'[A-Z]+',1,level)
FROM dual
CONNECT BY level <= REGEXP_COUNT(full_name,'[A-Z]')) AS sys.odcivarchar2list ))
), t3 AS
(
SELECT id, LISTAGG(names,' ') WITHIN GROUP (ORDER BY id,names) AS full_name
FROM t2
GROUP BY id
)
SELECT MIN(id) AS min_id, full_name
FROM t3
GROUP BY full_name
ORDER BY min_id