Sql 使用 regexp_substr 的查询很慢,并且在 table 中插入更多记录后超时
Sql query with regexp_substr is slow and timing out once more records are inserted in table
我有一个table
cbcm(REPORT_NAME varchar2(30), WHERE_CLAUSE varchar2(2000))
insert into cbcm(REPORT_NAME,WHERE_CLAUSE)
values('SE_SUPP2','29786399,29271272,29815958,29821597,29821140,29821791,29850566')
此处 WHERE_CLAUSE
具有整数 (id) 值,这些值用“,”分隔值插入。
秒table:
cust_bug_data(id integer, name varchar2(20))
table cbcm
.
中的 WHERE_CLAUSE
中的 id 作为值
insert into cust_bug_data(29786399,'test')
现在我想列出 cust_bug_data
中的记录,其中 id
来自 cbcm
table 中的特定 REPORT_NAME
,如下所示:
select *
from cust_bug_data
where id in(
select regexp_substr(WHERE_CLAUSE,'[^,]+',1,level) WHERE_CLAUSE
from cbcm
where REPORT_NAME='SE_SUPP2'
connect by regexp_substr(WHERE_CLAUSE,'[^,]+',1,level) is not null
)
Above query is taking 12 secs for column values `WHERE_CLAUSE` having 40 "," delimited values. Now when I inserted another record in `cbcm` having 90 "," delimited values, above query is getting timed out.
Could you please suggest a way for the same.
检查一个值是否是另一个值的子字符串:
select *
from cust_bug_data cbd
where EXISTS (
SELECT 1
FROM cbcm
WHERE cbcm.REPORT_NAME='SE_SUPP2'
AND ',' || cbcm.WHERE_CLAUSE || ',' LIKE '%,' || cbd.id || ',%'
)
输出:
ID | NAME
-------: | :---
29786399 | test
db<>fiddle here
或者不要在数据库中存储分隔字符串:
CREATE TABLE cbcm(
REPORT_NAME varchar2(30) PRIMARY KEY
);
CREATE TABLE cbcm_where(
REPORT_NAME varchar2(30) REFERENCES cbcm ( REPORT_NAME ),
WHERE_CLAUSE integer,
PRIMARY KEY ( REPORT_NAME, WHERE_CLAUSE )
);
CREATE TABLE cust_bug_data(id integer, name varchar2(20));
insert into cbcm (REPORT_NAME ) values('SE_SUPP2');
insert into cust_bug_data VALUES ( 29786399, 'test');
插入列表时将其拆分(这里是一种使用更快的字符串函数而不是慢速正则表达式的方法):
insert into cbcm_where ( REPORT_NAME, WHERE_CLAUSE )
WITH list_to_insert ( report_name, list ) AS (
SELECT 'SE_SUPP2', '29786399,29271272,29815958,29821597,29821140,29821791,29850566' FROM DUAL
),
bounds ( report_name, list, startidx, endidx ) AS (
SELECT report_name,
list,
1,
INSTR( list, ',', 1 )
FROM list_to_insert
UNION ALL
SELECT report_name,
list,
endidx + 1,
INSTR( list, ',', endidx + 1 )
FROM bounds
WHERE endidx > 0
)
SELECT report_name,
TO_NUMBER(
CASE
WHEN endidx = 0
THEN SUBSTR( list, startidx )
ELSE SUBSTR( list, startidx, endidx - startidx )
END
)
FROM bounds
然后查询你的数据:
select *
from cust_bug_data cbd
where EXISTS (
SELECT 1
FROM cbcm_where cw
WHERE cw.REPORT_NAME='SE_SUPP2'
AND cw.WHERE_CLAUSE = cbd.id
)
ID | NAME
-------: | :---
29786399 | test
db<>fiddle here
在这里,您的查询需要时间,因为 IN
子句是针对 cust_bug_data
的每一行进行评估的,而 IN
子句正在执行需要时间的 connect by
查询。
您可以使用 regexp_like
直接加入,如下所示:
select *
from cust_bug_data cbd
Join cbcm on regexp_like(','|| cbcm.where_clause ||',' ,','|| cbd.id ||',')
WHERE ..
干杯!!
我有一个table
cbcm(REPORT_NAME varchar2(30), WHERE_CLAUSE varchar2(2000))
insert into cbcm(REPORT_NAME,WHERE_CLAUSE)
values('SE_SUPP2','29786399,29271272,29815958,29821597,29821140,29821791,29850566')
此处 WHERE_CLAUSE
具有整数 (id) 值,这些值用“,”分隔值插入。
秒table:
cust_bug_data(id integer, name varchar2(20))
table cbcm
.
WHERE_CLAUSE
中的 id 作为值
insert into cust_bug_data(29786399,'test')
现在我想列出 cust_bug_data
中的记录,其中 id
来自 cbcm
table 中的特定 REPORT_NAME
,如下所示:
select *
from cust_bug_data
where id in(
select regexp_substr(WHERE_CLAUSE,'[^,]+',1,level) WHERE_CLAUSE
from cbcm
where REPORT_NAME='SE_SUPP2'
connect by regexp_substr(WHERE_CLAUSE,'[^,]+',1,level) is not null
)
Above query is taking 12 secs for column values `WHERE_CLAUSE` having 40 "," delimited values. Now when I inserted another record in `cbcm` having 90 "," delimited values, above query is getting timed out.
Could you please suggest a way for the same.
检查一个值是否是另一个值的子字符串:
select *
from cust_bug_data cbd
where EXISTS (
SELECT 1
FROM cbcm
WHERE cbcm.REPORT_NAME='SE_SUPP2'
AND ',' || cbcm.WHERE_CLAUSE || ',' LIKE '%,' || cbd.id || ',%'
)
输出:
ID | NAME -------: | :--- 29786399 | test
db<>fiddle here
或者不要在数据库中存储分隔字符串:
CREATE TABLE cbcm(
REPORT_NAME varchar2(30) PRIMARY KEY
);
CREATE TABLE cbcm_where(
REPORT_NAME varchar2(30) REFERENCES cbcm ( REPORT_NAME ),
WHERE_CLAUSE integer,
PRIMARY KEY ( REPORT_NAME, WHERE_CLAUSE )
);
CREATE TABLE cust_bug_data(id integer, name varchar2(20));
insert into cbcm (REPORT_NAME ) values('SE_SUPP2');
insert into cust_bug_data VALUES ( 29786399, 'test');
插入列表时将其拆分(这里是一种使用更快的字符串函数而不是慢速正则表达式的方法):
insert into cbcm_where ( REPORT_NAME, WHERE_CLAUSE )
WITH list_to_insert ( report_name, list ) AS (
SELECT 'SE_SUPP2', '29786399,29271272,29815958,29821597,29821140,29821791,29850566' FROM DUAL
),
bounds ( report_name, list, startidx, endidx ) AS (
SELECT report_name,
list,
1,
INSTR( list, ',', 1 )
FROM list_to_insert
UNION ALL
SELECT report_name,
list,
endidx + 1,
INSTR( list, ',', endidx + 1 )
FROM bounds
WHERE endidx > 0
)
SELECT report_name,
TO_NUMBER(
CASE
WHEN endidx = 0
THEN SUBSTR( list, startidx )
ELSE SUBSTR( list, startidx, endidx - startidx )
END
)
FROM bounds
然后查询你的数据:
select *
from cust_bug_data cbd
where EXISTS (
SELECT 1
FROM cbcm_where cw
WHERE cw.REPORT_NAME='SE_SUPP2'
AND cw.WHERE_CLAUSE = cbd.id
)
ID | NAME -------: | :--- 29786399 | test
db<>fiddle here
在这里,您的查询需要时间,因为 IN
子句是针对 cust_bug_data
的每一行进行评估的,而 IN
子句正在执行需要时间的 connect by
查询。
您可以使用 regexp_like
直接加入,如下所示:
select *
from cust_bug_data cbd
Join cbcm on regexp_like(','|| cbcm.where_clause ||',' ,','|| cbd.id ||',')
WHERE ..
干杯!!