如何调整 Oracle SQL 查询
How to tune an Oracle SQL query
table temp 包含 "word" 和 "sentence" 列。下面的代码检查,如果句子中有单词列中的任何单词。如果该词存在,该词将被替换为 URL(包含该词本身及其 ID)。该代码适用于大约 1-10 行。 table 有大约 50k 条记录。它消耗了整个温度 space。我如何查看和微调查询?
要求:有50k单词和句子。要求是如果单词列中存在任何单词,则将句子中的单词替换为 URL(包含单词及其 id)。在查找单词时,搜索必须不区分大小写。另外,我们需要在句子中保留相同的大小写,同时替换为 URL.
Create table temp(
id NUMBER,
word VARCHAR2(1000),
Sentence VARCHAR2(2000)
);
insert into temp
SELECT 1,'automation testing', 'automtestingation TeStInG TEST is popular kind of testing' FROM DUAL UNION ALL
SELECT 2,'testing','manual testing' FROM DUAL UNION ALL
select 2,'test', 'test' FROM DUAL UNION ALL
SELECT 3,'manual testing','this is an old method of testing' FROM DUAL UNION ALL
SELECT 4,'punctuation','automation testing,manual testing,punctuation,automanual testing-testing' FROM DUAL UNION ALL
SELECT 5,'B-number analysis','B-number analysis table' FROM DUAL UNION ALL
SELECT 6,'B-number analysis table','testing B-number analysis' FROM DUAL UNION ALL
SELECT 7,'Not Matched','testing testing testing' FROM DUAL
SQL 类型:
CREATE TYPE stringlist IS TABLE OF VARCHAR2(4000);
/
CREATE TYPE intlist IS TABLE OF NUMBER(20,0);
/
PLSQL函数
CREATE FUNCTION replace_words(
word_list IN stringlist,
id_list IN intlist,
sentence IN temp.sentence%TYPE
) RETURN temp.sentence%TYPE
IS
p_sentence temp.sentence%TYPE := UPPER( sentence );
p_pos PLS_INTEGER := 1;
p_min_word_index PLS_INTEGER;
p_word_index PLS_INTEGER;
p_start PLS_INTEGER;
p_index PLS_INTEGER;
o_sentence temp.sentence%TYPE;
BEGIN
LOOP
p_min_word_index := NULL;
p_index := NULL;
FOR i IN 1 .. word_list.COUNT LOOP
p_word_index := p_pos;
LOOP
p_word_index := INSTR( p_sentence, word_list(i), p_word_index );
EXIT WHEN p_word_index = 0;
IF ( p_word_index > 1
AND REGEXP_LIKE( SUBSTR( p_sentence, p_word_index - 1, 1 ), '\w' )
)
OR REGEXP_LIKE( SUBSTR( p_sentence, p_word_index + LENGTH( word_list(i) ), 1 ), '\w' )
THEN
p_word_index := p_word_index + 1;
CONTINUE;
END IF;
IF p_min_word_index IS NULL OR p_word_index < p_min_word_index THEN
p_min_word_index := p_word_index;
p_index := i;
END IF;
EXIT;
END LOOP;
END LOOP;
IF p_index IS NULL THEN
o_sentence := o_sentence || SUBSTR( sentence, p_pos );
EXIT;
ELSE
o_sentence := o_sentence
|| SUBSTR( sentence, p_pos, p_min_word_index - p_pos )
|| 'http://localhost/'
|| id_list(p_index)
|| '/<u>'
|| SUBSTR( sentence, p_min_word_index, LENGTH( word_list( p_index ) ) )
|| '</u>';
p_pos := p_min_word_index + LENGTH( word_list( p_index ) );
END IF;
END LOOP;
RETURN o_sentence;
END;
/
合并
MERGE INTO temp dst
USING (
WITH lists ( word_list, id_list ) AS (
SELECT CAST(
COLLECT(
UPPER( word )
ORDER BY LENGTH( word ) DESC, UPPER( word ) ASC, ROWNUM
)
AS stringlist
),
CAST(
COLLECT(
id
ORDER BY LENGTH( word ) DESC, UPPER( word ) ASC, ROWNUM
)
AS intlist
)
FROM temp
)
SELECT t.ROWID rid,
replace_words(
word_list,
id_list,
sentence
) AS replaced_sentence
FROM temp t
CROSS JOIN lists
) src
ON ( dst.ROWID = src.RID )
WHEN MATCHED THEN
UPDATE SET sentence = src.replaced_sentence;
我将单词(带有 ID)与句子分开,并将单词小写,因为无论如何您都需要不区分大小写的搜索。如果我在句子的相同位置找到两个匹配项,我会选择较长的那个。如果有重叠('manual testing'和'testing strategy'),我总是选择句子中最先出现的"word"。
此致,
炖阿什顿
SQL> Create table temp(
2 id NUMBER,
3 word VARCHAR2(1000),
4 Sentence VARCHAR2(2000)
5 );
SQL> insert into temp
2 SELECT 1,'automation testing', 'automtestingation TeStInG TEST is popular kind of testing' FROM DUAL UNION ALL
3 SELECT 2,'testing','manual testing' FROM DUAL UNION ALL
4 select 2,'test', 'test' FROM DUAL UNION ALL
5 SELECT 3,'manual testing','this is an old method of testing' FROM DUAL UNION ALL
6 SELECT 4,'punctuation','automation Testing,manual tEsting,punctuation,automanual teSting-tesTing' FROM DUAL UNION ALL
7 SELECT 5,'B-number analysis','B-number analysis table' FROM DUAL UNION ALL
8 SELECT 6,'B-number analysis table','testing B-number analysis' FROM DUAL UNION ALL
9 SELECT 7,'Not Matched','Testing tEsting teSting' FROM DUAL;
SQL> create table sentences as select sentence from temp;
SQL> create table words cache as
2 select length(word) word_length,
3 min(id) id,
4 lower(word) word
5 from temp
6 group by length(word), lower(word);
SQL> insert into sentences
2 select listagg(word, ',') within group(order by word)
3 from words;
SQL> insert into sentences values('Nothing matches here');
SQL> commit;
SQL> declare
2 cursor cur_sentences is
3 select rowid rid, sentence from sentences s
4 where exists (
5 select null from words
6 where instr(lower(s.sentence), word) > 0
7 )
8 for update;
9 type tt_sentences is table of cur_sentences%rowtype;
10 lt_sentences tt_sentences;
11 lt_sentences_new tt_sentences;
12
13 function change_sentence(p_sentence in sentences.sentence%type)
14 return sentences.sentence%type is
15 cursor cur_words(cp_sentence in sentences.sentence%type) is
16 with recurse (pos, word_length, id, word) as (
17 select regexp_instr(cp_sentence, '(^|\W)('||word||')(\W|$)', 1, 1, 0, 'i', 2),
18 word_length, id, word
19 from words
20 where regexp_instr(cp_sentence, '(^|\W)('||word||')(\W|$)', 1, 1, 0, 'i', 2) > 0
21 union all
22 select regexp_instr(cp_sentence, '(^|\W)('||word||')(\W|$)', pos+1, 1, 0, 'i', 2),
23 word_length, id, word
24 from recurse
25 where regexp_instr(cp_sentence, '(^|\W)('||word||')(\W|$)', pos+1, 1, 0, 'i', 2) > 0
26 )
27 select pos, word_length, id, word,
28 substr(cp_sentence, pos, length(word)) new_word
29 from recurse
30 order by pos, word_length desc;
31 type tt_words is table of cur_words%rowtype;
32 lt_words tt_words;
33 lt_words_kept tt_words:= new tt_words();
34 l_pos number := 0;
35 l_sentence sentences.sentence%type := p_sentence;
36 begin
37 open cur_words(p_sentence);
38 fetch cur_words bulk collect into lt_words;
39 for i in 1..lt_words.count loop
40 if l_pos < lt_words(i).pos then
41 l_pos := lt_words(i).pos + lt_words(i).word_length;
42 lt_words_kept.extend;
43 lt_words_kept(lt_words_kept.count) := lt_words(i);
44 end if;
45 end loop;
46 close cur_words;
47 for i in reverse 1..lt_words_kept.count loop
48 l_sentence := regexp_replace(
49 l_sentence,
50 lt_words_kept(i).new_word,
51 'http://localhost/'||lt_words_kept(i).id||'/<u>'||lt_words_kept(i).new_word||'</u>',
52 lt_words_kept(i).pos,
53 1
54 );
55 end loop;
56 return l_sentence;
57 exception when others then
58 close cur_words;
59 raise;
60 end change_sentence;
61
62 begin
63 open cur_sentences;
64 loop
65 fetch cur_sentences bulk collect into lt_sentences limit 100;
66 exit when lt_sentences.count = 0;
67 lt_sentences_new := new tt_sentences();
68 lt_sentences_new.extend(lt_sentences.count);
69 for i in 1..lt_sentences.count loop
70 lt_sentences_new(i).sentence := change_sentence(lt_sentences(i).sentence);
71 end loop;
72 forall i in 1..lt_sentences.count
73 update sentences set sentence = lt_sentences_new(i).sentence where rowid = lt_sentences(i).rid;
74 exit when cur_sentences%notfound;
75 end loop;
76 close cur_sentences;
77 exception when others then
78 if cur_sentences%isopen then
79 close cur_sentences;
80 raise;
81 end if;
82 end;
83 /
PL/SQL procedure successfully completed.
SQL> select * from sentences order by 1;
SENTENCE
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nothing matches here
automtestingation http://localhost/2/<u>TeStInG</u> http://localhost/2/<u>TEST</u> is popular kind of http://localhost/2/<u>testing</u>
http://localhost/1/<u>automation Testing</u>,http://localhost/3/<u>manual tEsting</u>,http://localhost/4/<u>punctuation</u>,automanual http://localhost/2/<u>teSting</u>-http://localhost/2/<u>tesTing</u>
http://localhost/1/<u>automation testing</u>,http://localhost/5/<u>b-number analysis</u>,http://localhost/6/<u>b-number analysis table</u>,http://localhost/3/<u>manual testing</u>,http://localhost/7/<u>not matched</u>,http://localhost/4/<u>punctuation</u>,http://localhost/2/<u>test</u>,http://localhost/2/<u>testing</u>
http://localhost/2/<u>Testing</u> http://localhost/2/<u>tEsting</u> http://localhost/2/<u>teSting</u>
http://localhost/2/<u>test</u>
http://localhost/2/<u>testing</u> http://localhost/5/<u>B-number analysis</u>
http://localhost/3/<u>manual testing</u>
http://localhost/6/<u>B-number analysis table</u>
this is an old method of http://localhost/2/<u>testing</u>
table temp 包含 "word" 和 "sentence" 列。下面的代码检查,如果句子中有单词列中的任何单词。如果该词存在,该词将被替换为 URL(包含该词本身及其 ID)。该代码适用于大约 1-10 行。 table 有大约 50k 条记录。它消耗了整个温度 space。我如何查看和微调查询?
要求:有50k单词和句子。要求是如果单词列中存在任何单词,则将句子中的单词替换为 URL(包含单词及其 id)。在查找单词时,搜索必须不区分大小写。另外,我们需要在句子中保留相同的大小写,同时替换为 URL.
Create table temp(
id NUMBER,
word VARCHAR2(1000),
Sentence VARCHAR2(2000)
);
insert into temp
SELECT 1,'automation testing', 'automtestingation TeStInG TEST is popular kind of testing' FROM DUAL UNION ALL
SELECT 2,'testing','manual testing' FROM DUAL UNION ALL
select 2,'test', 'test' FROM DUAL UNION ALL
SELECT 3,'manual testing','this is an old method of testing' FROM DUAL UNION ALL
SELECT 4,'punctuation','automation testing,manual testing,punctuation,automanual testing-testing' FROM DUAL UNION ALL
SELECT 5,'B-number analysis','B-number analysis table' FROM DUAL UNION ALL
SELECT 6,'B-number analysis table','testing B-number analysis' FROM DUAL UNION ALL
SELECT 7,'Not Matched','testing testing testing' FROM DUAL
SQL 类型:
CREATE TYPE stringlist IS TABLE OF VARCHAR2(4000);
/
CREATE TYPE intlist IS TABLE OF NUMBER(20,0);
/
PLSQL函数
CREATE FUNCTION replace_words(
word_list IN stringlist,
id_list IN intlist,
sentence IN temp.sentence%TYPE
) RETURN temp.sentence%TYPE
IS
p_sentence temp.sentence%TYPE := UPPER( sentence );
p_pos PLS_INTEGER := 1;
p_min_word_index PLS_INTEGER;
p_word_index PLS_INTEGER;
p_start PLS_INTEGER;
p_index PLS_INTEGER;
o_sentence temp.sentence%TYPE;
BEGIN
LOOP
p_min_word_index := NULL;
p_index := NULL;
FOR i IN 1 .. word_list.COUNT LOOP
p_word_index := p_pos;
LOOP
p_word_index := INSTR( p_sentence, word_list(i), p_word_index );
EXIT WHEN p_word_index = 0;
IF ( p_word_index > 1
AND REGEXP_LIKE( SUBSTR( p_sentence, p_word_index - 1, 1 ), '\w' )
)
OR REGEXP_LIKE( SUBSTR( p_sentence, p_word_index + LENGTH( word_list(i) ), 1 ), '\w' )
THEN
p_word_index := p_word_index + 1;
CONTINUE;
END IF;
IF p_min_word_index IS NULL OR p_word_index < p_min_word_index THEN
p_min_word_index := p_word_index;
p_index := i;
END IF;
EXIT;
END LOOP;
END LOOP;
IF p_index IS NULL THEN
o_sentence := o_sentence || SUBSTR( sentence, p_pos );
EXIT;
ELSE
o_sentence := o_sentence
|| SUBSTR( sentence, p_pos, p_min_word_index - p_pos )
|| 'http://localhost/'
|| id_list(p_index)
|| '/<u>'
|| SUBSTR( sentence, p_min_word_index, LENGTH( word_list( p_index ) ) )
|| '</u>';
p_pos := p_min_word_index + LENGTH( word_list( p_index ) );
END IF;
END LOOP;
RETURN o_sentence;
END;
/
合并
MERGE INTO temp dst
USING (
WITH lists ( word_list, id_list ) AS (
SELECT CAST(
COLLECT(
UPPER( word )
ORDER BY LENGTH( word ) DESC, UPPER( word ) ASC, ROWNUM
)
AS stringlist
),
CAST(
COLLECT(
id
ORDER BY LENGTH( word ) DESC, UPPER( word ) ASC, ROWNUM
)
AS intlist
)
FROM temp
)
SELECT t.ROWID rid,
replace_words(
word_list,
id_list,
sentence
) AS replaced_sentence
FROM temp t
CROSS JOIN lists
) src
ON ( dst.ROWID = src.RID )
WHEN MATCHED THEN
UPDATE SET sentence = src.replaced_sentence;
我将单词(带有 ID)与句子分开,并将单词小写,因为无论如何您都需要不区分大小写的搜索。如果我在句子的相同位置找到两个匹配项,我会选择较长的那个。如果有重叠('manual testing'和'testing strategy'),我总是选择句子中最先出现的"word"。
此致, 炖阿什顿
SQL> Create table temp(
2 id NUMBER,
3 word VARCHAR2(1000),
4 Sentence VARCHAR2(2000)
5 );
SQL> insert into temp
2 SELECT 1,'automation testing', 'automtestingation TeStInG TEST is popular kind of testing' FROM DUAL UNION ALL
3 SELECT 2,'testing','manual testing' FROM DUAL UNION ALL
4 select 2,'test', 'test' FROM DUAL UNION ALL
5 SELECT 3,'manual testing','this is an old method of testing' FROM DUAL UNION ALL
6 SELECT 4,'punctuation','automation Testing,manual tEsting,punctuation,automanual teSting-tesTing' FROM DUAL UNION ALL
7 SELECT 5,'B-number analysis','B-number analysis table' FROM DUAL UNION ALL
8 SELECT 6,'B-number analysis table','testing B-number analysis' FROM DUAL UNION ALL
9 SELECT 7,'Not Matched','Testing tEsting teSting' FROM DUAL;
SQL> create table sentences as select sentence from temp;
SQL> create table words cache as
2 select length(word) word_length,
3 min(id) id,
4 lower(word) word
5 from temp
6 group by length(word), lower(word);
SQL> insert into sentences
2 select listagg(word, ',') within group(order by word)
3 from words;
SQL> insert into sentences values('Nothing matches here');
SQL> commit;
SQL> declare
2 cursor cur_sentences is
3 select rowid rid, sentence from sentences s
4 where exists (
5 select null from words
6 where instr(lower(s.sentence), word) > 0
7 )
8 for update;
9 type tt_sentences is table of cur_sentences%rowtype;
10 lt_sentences tt_sentences;
11 lt_sentences_new tt_sentences;
12
13 function change_sentence(p_sentence in sentences.sentence%type)
14 return sentences.sentence%type is
15 cursor cur_words(cp_sentence in sentences.sentence%type) is
16 with recurse (pos, word_length, id, word) as (
17 select regexp_instr(cp_sentence, '(^|\W)('||word||')(\W|$)', 1, 1, 0, 'i', 2),
18 word_length, id, word
19 from words
20 where regexp_instr(cp_sentence, '(^|\W)('||word||')(\W|$)', 1, 1, 0, 'i', 2) > 0
21 union all
22 select regexp_instr(cp_sentence, '(^|\W)('||word||')(\W|$)', pos+1, 1, 0, 'i', 2),
23 word_length, id, word
24 from recurse
25 where regexp_instr(cp_sentence, '(^|\W)('||word||')(\W|$)', pos+1, 1, 0, 'i', 2) > 0
26 )
27 select pos, word_length, id, word,
28 substr(cp_sentence, pos, length(word)) new_word
29 from recurse
30 order by pos, word_length desc;
31 type tt_words is table of cur_words%rowtype;
32 lt_words tt_words;
33 lt_words_kept tt_words:= new tt_words();
34 l_pos number := 0;
35 l_sentence sentences.sentence%type := p_sentence;
36 begin
37 open cur_words(p_sentence);
38 fetch cur_words bulk collect into lt_words;
39 for i in 1..lt_words.count loop
40 if l_pos < lt_words(i).pos then
41 l_pos := lt_words(i).pos + lt_words(i).word_length;
42 lt_words_kept.extend;
43 lt_words_kept(lt_words_kept.count) := lt_words(i);
44 end if;
45 end loop;
46 close cur_words;
47 for i in reverse 1..lt_words_kept.count loop
48 l_sentence := regexp_replace(
49 l_sentence,
50 lt_words_kept(i).new_word,
51 'http://localhost/'||lt_words_kept(i).id||'/<u>'||lt_words_kept(i).new_word||'</u>',
52 lt_words_kept(i).pos,
53 1
54 );
55 end loop;
56 return l_sentence;
57 exception when others then
58 close cur_words;
59 raise;
60 end change_sentence;
61
62 begin
63 open cur_sentences;
64 loop
65 fetch cur_sentences bulk collect into lt_sentences limit 100;
66 exit when lt_sentences.count = 0;
67 lt_sentences_new := new tt_sentences();
68 lt_sentences_new.extend(lt_sentences.count);
69 for i in 1..lt_sentences.count loop
70 lt_sentences_new(i).sentence := change_sentence(lt_sentences(i).sentence);
71 end loop;
72 forall i in 1..lt_sentences.count
73 update sentences set sentence = lt_sentences_new(i).sentence where rowid = lt_sentences(i).rid;
74 exit when cur_sentences%notfound;
75 end loop;
76 close cur_sentences;
77 exception when others then
78 if cur_sentences%isopen then
79 close cur_sentences;
80 raise;
81 end if;
82 end;
83 /
PL/SQL procedure successfully completed.
SQL> select * from sentences order by 1;
SENTENCE
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nothing matches here
automtestingation http://localhost/2/<u>TeStInG</u> http://localhost/2/<u>TEST</u> is popular kind of http://localhost/2/<u>testing</u>
http://localhost/1/<u>automation Testing</u>,http://localhost/3/<u>manual tEsting</u>,http://localhost/4/<u>punctuation</u>,automanual http://localhost/2/<u>teSting</u>-http://localhost/2/<u>tesTing</u>
http://localhost/1/<u>automation testing</u>,http://localhost/5/<u>b-number analysis</u>,http://localhost/6/<u>b-number analysis table</u>,http://localhost/3/<u>manual testing</u>,http://localhost/7/<u>not matched</u>,http://localhost/4/<u>punctuation</u>,http://localhost/2/<u>test</u>,http://localhost/2/<u>testing</u>
http://localhost/2/<u>Testing</u> http://localhost/2/<u>tEsting</u> http://localhost/2/<u>teSting</u>
http://localhost/2/<u>test</u>
http://localhost/2/<u>testing</u> http://localhost/5/<u>B-number analysis</u>
http://localhost/3/<u>manual testing</u>
http://localhost/6/<u>B-number analysis table</u>
this is an old method of http://localhost/2/<u>testing</u>