PLSQL:如何计算列中所有值中子字符串的出现次数
PLSQL: How do I count the occurrences of a substring in all values in a column
我在 Oracle 数据库中有这样一个 table:
substring
string
abc
123-def-abc
def
123-def
ghi
123-def-ghi
jkl
123-456-jkl
mno
123-456-jkl-mno
我需要计算每个 substring
在 string
列的所有行中出现的次数。示例结果为:
substring
string
count
abc
123-def-abc
1
def
123-def
3
ghi
123-def-ghi
1
jkl
123-456-jkl
2
mno
123-456-jkl-mno
1
我怎样才能达到这个结果?
这可能是一种选择。
示例数据
SQL> with test (substring, string) as
2 (select 'abc', '123-def-abc' from dual union all
3 select 'def', '123-def' from dual union all
4 select 'ghi', '123-def-ghi' from dual union all
5 select 'jkl', '123-456-jkl' from dual union all
6 select 'mno', '123-456-jkl-mno' from dual
7 ),
检查 substring
是否存在于 string
中(并使用交叉连接,因为您必须检查 所有组合)
8 temp as
9 (select a.substring,
10 case when instr(b.string, a.substring) > 1 then 1 else 0 end cnt
11 from test a cross join test b
12 -- group by a.substring
13 )
最后,return 加入“原始”table 和 temp
CTE 的结果:
14 select a.substring, a.string, sum(b.cnt) cnt
15 from test a join temp b on a.substring = b.substring
16 group by a.substring, a.string
17 order by a.substring;
SUB STRING CNT
--- --------------- ----------
abc 123-def-abc 1
def 123-def 3
ghi 123-def-ghi 1
jkl 123-456-jkl 2
mno 123-456-jkl-mno 1
SQL>
几个解决方案:
DBFiddle
cross apply
(或横向或交叉连接):
select *
from t
cross apply(
select count(*) cnt
from t t2
where t2.string like '%'||t.substring||'%'
) a
order by substring;
- 子查询:
select
t.*
,(
select count(*) cnt
from t t2
where t2.string like '%'||t.substring||'%'
) cnt
from t
order by substring;
connect by
:
select
substring,string,count(*)
from (
select
connect_by_root substring as substring
,connect_by_root string as string
from t
connect by nocycle
level<=2
and string like '%'||(prior t.substring)||'%'
)
group by substring,string
order by substring;
model
:
select
substring,string,cnt
from t
model
dimension by (substring,string)
measures(0 as cnt,string as string2)
rules(
cnt[any,any] order by substring = count(*)[any,string like '%'||cv()||'%']
)
order by substring;
- xmlquery + xmlagg:
select--+ NO_XML_QUERY_REWRITE
substring,string,
xmlcast(
xmlquery(
'count($D/ROW/VAL[contains(., $X)])'
passing
xmlelement("ROW", (xmlagg(xmlelement(VAL, string)) over())) as d,
substring as x
returning content) as number) as cnt
from t
order by substring;
这可以通过 CTE 调用 2 次原始表来完成,一次使用 distinct 获取子字符串,另一次获取每个字符串的所有出现,cross-joined。然后我们使用聚合来计算每个匹配项的出现次数。
create table if not exists strings(
substrin varchar(3),
strin varchar(20)
);
delete from strings;
insert into strings values('abc','123-def-abc');
insert into strings values('def','123-def');
insert into strings values('def','123-def');
insert into strings values('ghi','123-def-ghi');
insert into strings values('jkl','123-456-jkl');
insert into strings values('jkl','123-456-jkl');
insert into strings values('mno','123-456-jkl-mno');
with sss as
( select s1.substrin sb, s2.strin sg
from (Select distinct substrin from strings ) s1,
strings s2)
select
sb 'substring',
sg 'string',
count(*) 'count'
from sss
where sg like concat('%',sb,'%')
group by sg,sb;
请注意,这不会给出与子字符串匹配的总行数,而是每对出现的次数。
我在 Oracle 数据库中有这样一个 table:
substring | string |
---|---|
abc | 123-def-abc |
def | 123-def |
ghi | 123-def-ghi |
jkl | 123-456-jkl |
mno | 123-456-jkl-mno |
我需要计算每个 substring
在 string
列的所有行中出现的次数。示例结果为:
substring | string | count |
---|---|---|
abc | 123-def-abc | 1 |
def | 123-def | 3 |
ghi | 123-def-ghi | 1 |
jkl | 123-456-jkl | 2 |
mno | 123-456-jkl-mno | 1 |
我怎样才能达到这个结果?
这可能是一种选择。
示例数据
SQL> with test (substring, string) as
2 (select 'abc', '123-def-abc' from dual union all
3 select 'def', '123-def' from dual union all
4 select 'ghi', '123-def-ghi' from dual union all
5 select 'jkl', '123-456-jkl' from dual union all
6 select 'mno', '123-456-jkl-mno' from dual
7 ),
检查 substring
是否存在于 string
中(并使用交叉连接,因为您必须检查 所有组合)
8 temp as
9 (select a.substring,
10 case when instr(b.string, a.substring) > 1 then 1 else 0 end cnt
11 from test a cross join test b
12 -- group by a.substring
13 )
最后,return 加入“原始”table 和 temp
CTE 的结果:
14 select a.substring, a.string, sum(b.cnt) cnt
15 from test a join temp b on a.substring = b.substring
16 group by a.substring, a.string
17 order by a.substring;
SUB STRING CNT
--- --------------- ----------
abc 123-def-abc 1
def 123-def 3
ghi 123-def-ghi 1
jkl 123-456-jkl 2
mno 123-456-jkl-mno 1
SQL>
几个解决方案: DBFiddle
cross apply
(或横向或交叉连接):
select *
from t
cross apply(
select count(*) cnt
from t t2
where t2.string like '%'||t.substring||'%'
) a
order by substring;
- 子查询:
select
t.*
,(
select count(*) cnt
from t t2
where t2.string like '%'||t.substring||'%'
) cnt
from t
order by substring;
connect by
:
select
substring,string,count(*)
from (
select
connect_by_root substring as substring
,connect_by_root string as string
from t
connect by nocycle
level<=2
and string like '%'||(prior t.substring)||'%'
)
group by substring,string
order by substring;
model
:
select
substring,string,cnt
from t
model
dimension by (substring,string)
measures(0 as cnt,string as string2)
rules(
cnt[any,any] order by substring = count(*)[any,string like '%'||cv()||'%']
)
order by substring;
- xmlquery + xmlagg:
select--+ NO_XML_QUERY_REWRITE
substring,string,
xmlcast(
xmlquery(
'count($D/ROW/VAL[contains(., $X)])'
passing
xmlelement("ROW", (xmlagg(xmlelement(VAL, string)) over())) as d,
substring as x
returning content) as number) as cnt
from t
order by substring;
这可以通过 CTE 调用 2 次原始表来完成,一次使用 distinct 获取子字符串,另一次获取每个字符串的所有出现,cross-joined。然后我们使用聚合来计算每个匹配项的出现次数。
create table if not exists strings(
substrin varchar(3),
strin varchar(20)
);
delete from strings;
insert into strings values('abc','123-def-abc');
insert into strings values('def','123-def');
insert into strings values('def','123-def');
insert into strings values('ghi','123-def-ghi');
insert into strings values('jkl','123-456-jkl');
insert into strings values('jkl','123-456-jkl');
insert into strings values('mno','123-456-jkl-mno');
with sss as
( select s1.substrin sb, s2.strin sg
from (Select distinct substrin from strings ) s1,
strings s2)
select
sb 'substring',
sg 'string',
count(*) 'count'
from sss
where sg like concat('%',sb,'%')
group by sg,sb;
请注意,这不会给出与子字符串匹配的总行数,而是每对出现的次数。