甲骨文 |分组依据以获取唯一的逗号分隔值
Oracle | Group by to fetch unique comma separated values
我有一个 table,其技能列具有逗号分隔值。每个学生的技能可以重复。我需要使用 group by 来列出每个学生的独特技能。
我应该使用什么才能得到没有重复的列表。请帮忙。
输入Tablestudent_skills
:
STUDENT_ID
SEMESTER
SKILLS
101
1
C, SQL
101
2
C, CPP
102
1
CPP, Java
102
2
Java, JavaScript
想要的结果:
STUDENT_ID
SKILLS
101
C, SQL, CPP
102
CPP, Java, JavaScript
SQL 查询创建 table,插入数据:
create table student_skills
(STUDENT_ID number(10),
SEMESTER varchar2(5),
SKILLS varchar2(50));
insert into student_skills (STUDENT_ID, SEMESTER, SKILLS)
values (101, '1', 'C, SQL');
insert into student_skills (STUDENT_ID, SEMESTER, SKILLS)
values (101, '2', 'C, CPP');
insert into student_skills (STUDENT_ID, SEMESTER, SKILLS)
values (102, '1', 'CPP, Java');
insert into student_skills (STUDENT_ID, SEMESTER, SKILLS)
values (102, '2', 'Java, JavaScript');
commit;
一种选择是将技能拆分成行(temp
CTE),然后将它们聚合回去(第 11 行):
SQL> with temp as
2 (select distinct
3 student_id,
4 trim(regexp_substr(skills, '[^,]+', 1, column_value)) skill
5 from student_skills cross join
6 table(cast(multiset(select level from dual
7 connect by level <= regexp_count(skills, ',') + 1
8 ) as sys.odcinumberlist))
9 )
10 select student_id,
11 listagg(skill, ', ') within group (order by skill) skills
12 from temp
13 group by student_id;
STUDENT_ID SKILLS
---------- ------------------------------
101 C, CPP, SQL
102 CPP, Java, JavaScript
SQL>
您可以在提取以逗号分隔的子字符串时将子查询与 DISTINCT
子句一起使用,然后应用 LISTAGG()
函数重新组合
等片段
WITH s AS
(
SELECT DISTINCT student_id, REGEXP_SUBSTR(skills,'[^,]+',1,level) AS skills
FROM student_skills
CONNECT BY level <= REGEXP_COUNT(skills,',')+1
AND PRIOR student_id = student_id
AND PRIOR sys_guid() IS NOT NULL
)
SELECT student_id, LISTAGG(skills,', ') WITHIN GROUP (ORDER BY 0) AS skills
FROM s
GROUP BY student_id
如果您的输入数据是第一范式,生活会轻松很多...事实上,您必须先拆分字符串。毫无疑问,那些逗号分隔的字符串首先是由聚合生成的;难道不能恢复到更早的阶段,每行显示一个技能吗?
假设你在这件事上没有发言权,你必须先拆分输入字符串,然后才能 de-duplicate 并再次聚合。一种技巧是为此使用 JSON 函数,例如:
select student_id,
listagg(distinct skill, ', ')
within group (order by semester, ord) as skills
from student_skills,
json_table('["' || replace(skills, ', ', '","') || '"]', '$[*]'
columns (
skill path '$',
ord for ordinality
)
)
group by student_id
order by student_id
;
您可以使用 XPATH 函数查找不同的值:
SELECT student_id,
XMLQUERY(
'string-join(distinct-values(//text()), ", ")'
PASSING XMLTYPE(skills)
RETURNING CONTENT
).getStringVal() AS skills
FROM (
SELECT student_id,
'<b>'
|| LISTAGG('<a>' || REPLACE(skills, ', ', '</a><a>') || '</a>')
WITHIN GROUP (ORDER BY semester)
|| '</b>' AS skills
FROM student_skills
GROUP BY
student_id
);
其中,对于示例数据,输出:
STUDENT_ID
SKILLS
101
C, SQL, CPP
102
CPP, Java, JavaScript
db<>fiddle here
我有一个 table,其技能列具有逗号分隔值。每个学生的技能可以重复。我需要使用 group by 来列出每个学生的独特技能。
我应该使用什么才能得到没有重复的列表。请帮忙。
输入Tablestudent_skills
:
STUDENT_ID | SEMESTER | SKILLS |
---|---|---|
101 | 1 | C, SQL |
101 | 2 | C, CPP |
102 | 1 | CPP, Java |
102 | 2 | Java, JavaScript |
想要的结果:
STUDENT_ID | SKILLS |
---|---|
101 | C, SQL, CPP |
102 | CPP, Java, JavaScript |
SQL 查询创建 table,插入数据:
create table student_skills
(STUDENT_ID number(10),
SEMESTER varchar2(5),
SKILLS varchar2(50));
insert into student_skills (STUDENT_ID, SEMESTER, SKILLS)
values (101, '1', 'C, SQL');
insert into student_skills (STUDENT_ID, SEMESTER, SKILLS)
values (101, '2', 'C, CPP');
insert into student_skills (STUDENT_ID, SEMESTER, SKILLS)
values (102, '1', 'CPP, Java');
insert into student_skills (STUDENT_ID, SEMESTER, SKILLS)
values (102, '2', 'Java, JavaScript');
commit;
一种选择是将技能拆分成行(temp
CTE),然后将它们聚合回去(第 11 行):
SQL> with temp as
2 (select distinct
3 student_id,
4 trim(regexp_substr(skills, '[^,]+', 1, column_value)) skill
5 from student_skills cross join
6 table(cast(multiset(select level from dual
7 connect by level <= regexp_count(skills, ',') + 1
8 ) as sys.odcinumberlist))
9 )
10 select student_id,
11 listagg(skill, ', ') within group (order by skill) skills
12 from temp
13 group by student_id;
STUDENT_ID SKILLS
---------- ------------------------------
101 C, CPP, SQL
102 CPP, Java, JavaScript
SQL>
您可以在提取以逗号分隔的子字符串时将子查询与 DISTINCT
子句一起使用,然后应用 LISTAGG()
函数重新组合
WITH s AS
(
SELECT DISTINCT student_id, REGEXP_SUBSTR(skills,'[^,]+',1,level) AS skills
FROM student_skills
CONNECT BY level <= REGEXP_COUNT(skills,',')+1
AND PRIOR student_id = student_id
AND PRIOR sys_guid() IS NOT NULL
)
SELECT student_id, LISTAGG(skills,', ') WITHIN GROUP (ORDER BY 0) AS skills
FROM s
GROUP BY student_id
如果您的输入数据是第一范式,生活会轻松很多...事实上,您必须先拆分字符串。毫无疑问,那些逗号分隔的字符串首先是由聚合生成的;难道不能恢复到更早的阶段,每行显示一个技能吗?
假设你在这件事上没有发言权,你必须先拆分输入字符串,然后才能 de-duplicate 并再次聚合。一种技巧是为此使用 JSON 函数,例如:
select student_id,
listagg(distinct skill, ', ')
within group (order by semester, ord) as skills
from student_skills,
json_table('["' || replace(skills, ', ', '","') || '"]', '$[*]'
columns (
skill path '$',
ord for ordinality
)
)
group by student_id
order by student_id
;
您可以使用 XPATH 函数查找不同的值:
SELECT student_id,
XMLQUERY(
'string-join(distinct-values(//text()), ", ")'
PASSING XMLTYPE(skills)
RETURNING CONTENT
).getStringVal() AS skills
FROM (
SELECT student_id,
'<b>'
|| LISTAGG('<a>' || REPLACE(skills, ', ', '</a><a>') || '</a>')
WITHIN GROUP (ORDER BY semester)
|| '</b>' AS skills
FROM student_skills
GROUP BY
student_id
);
其中,对于示例数据,输出:
STUDENT_ID SKILLS 101 C, SQL, CPP 102 CPP, Java, JavaScript
db<>fiddle here