如何在Oracle中保留文本中的相同大小写?
How to retain the same case in the text in Oracle?
我有一个包含单词和句子列的 table。这个想法是,如果在单词列中找到单词,则用 link(包括单词本身)替换句子中的单词。下面的查询完全替换了,但是由于 link 是从 temp.word 列构造的,所以句子中单词的大小写更改为 word 列中单词的大小写。有没有办法在句子本身保留相同的大小写?
Create table temp(
id NUMBER,
word VARCHAR2(1000),
sentence VARCHAR2(2000)
);
insert into temp
SELECT 1,'automation testing', 'automtestingation Testing is popular kind of testing' FROM DUAL UNION ALL
SELECT 2,'testing','manual testing' FROM DUAL UNION ALL
SELECT 3,'manual testing','this is an old method of testing' FROM DUAL UNION ALL
SELECT 4,'punctuation','automation testing,manual testing,punctuation,automanual testing-testing' FROM DUAL UNION ALL
SELECT 5,'B-number analysis','B-number analysis table' FROM DUAL UNION ALL
SELECT 6,'B-number analysis table','testing B-number analysis' FROM DUAL UNION ALL
SELECT 7,'Not Matched','testing testing testing' FROM DUAL;
with words(id, word, word_length, search1, replace1, search2, replace2) as (
select id, word, length(word),
'(^|\W)' || REGEXP_REPLACE(word, '([][)(}{|^$\.*+?])', '\') || '($|\W)',
'{'|| id ||'}',
'{'|| id ||'}',
'http://localhost/' || id || '/<u>' || word || '</u>'
FROM temp
)
, joined_data as (
select w.search1, w.replace1, w.search2, w.replace2,
s.rowid s_rid, s.sentence,
row_number() over(partition by s.rowid order by word_length desc) rn
from words w
join temp s
on instr(UPPER(s.sentence), UPPER(w.word)) > 0
and regexp_like(s.sentence, w.search1)
)
, unpivoted_data as (
select S_RID, SENTENCE, PHASE, SEARCH_STRING, REPLACE_STRING,
row_number() over(partition by s_rid order by phase, rn) rn,
case when row_number() over(partition by s_rid order by phase, rn)
= count(*) over(partition by s_rid)
then 1
else 0
end is_last
from joined_data
unpivot(
(search_string, replace_string)
for phase in ( (search1, replace1) as 1, (search2, replace2) as 2 ))
)
, replaced_data(S_RID, RN, is_last, SENTENCE) as (
select S_RID, RN, is_last,
regexp_replace(SENTENCE, search_string, replace_string,1,0,'i')
from unpivoted_data
where rn = 1
union all
select n.S_RID, n.RN, n.is_last,
case when n.phase = 1
then regexp_replace(o.SENTENCE, n.search_string, n.replace_string,1,0,'i')
else replace(o.SENTENCE, n.search_string, n.replace_string)
end
from unpivoted_data n
join replaced_data o
on o.s_rid = n.s_rid and n.rn = o.rn + 1
)
select s_rid, sentence from replaced_data
where is_last = 1
order by s_rid;
For example, for id = 1, the sentence is automtestingation Testing is popular kind of testing
After replacement it will be automtestingation http://localhost/2/<u>testing</u> is popular kind of http://localhost/2/<u>testing</u>.
The word Testing is replaced with testing(from the temp.word column).
预期结果是
automtestingation http://localhost/2/<u>Testing</u> is popular kind of http://localhost/2/<u>testing</u>
虽然肯定有一种方法可以在单个 SQL 语句中完成此操作,但我认为使用单独的函数可以更好地解决此问题:
create or replace function replace_words(p_word varchar2, p_sentence varchar2) return varchar2 is
v_match_position number;
v_match_count number := 0;
v_new_sentence varchar2(4000) := p_sentence;
begin
--Find all matches.
loop
--Find Nth case-insensitive match
v_match_count := v_match_count + 1;
v_match_position := regexp_instr(
srcstr => v_new_sentence,
pattern => p_word,
occurrence => v_match_count,
modifier => 'i');
exit when v_match_position = 0;
--Insert text, instead of replacing, to keep the original case.
v_new_sentence :=
substr(v_new_sentence, 1, v_match_position - 1) || 'http://localhost/2/<u>'||
substr(v_new_sentence, v_match_position, length(p_word)) || '</u>'||
substr(v_new_sentence, v_match_position + length(p_word));
end loop;
return v_new_sentence;
end;
/
然后 SQL 查询如下所示:
select id, word, sentence, replace_words(word, sentence) from temp;
Oracle 设置:
Create table temp(
id NUMBER,
word VARCHAR2(1000),
Sentence VARCHAR2(2000)
);
insert into temp
SELECT 1,'automation testing', 'automtestingation TeStInG TEST is popular kind of testing' FROM DUAL UNION ALL
SELECT 2,'testing','manual testing' FROM DUAL UNION ALL
select 2,'test', 'test' FROM DUAL UNION ALL
SELECT 3,'manual testing','this is an old method of testing' FROM DUAL UNION ALL
SELECT 4,'punctuation','automation testing,manual testing,punctuation,automanual testing-testing' FROM DUAL UNION ALL
SELECT 5,'B-number analysis','B-number analysis table' FROM DUAL UNION ALL
SELECT 6,'B-number analysis table','testing B-number analysis' FROM DUAL UNION ALL
SELECT 7,'Not Matched','testing testing testing' FROM DUAL UNION ALL
SELECT 8,'^[($','testing characters ^[($ that need escaping in a regular expression' FROM DUAL;
SQL 类型:
CREATE TYPE stringlist IS TABLE OF VARCHAR2(4000);
/
CREATE TYPE intlist IS TABLE OF NUMBER(20,0);
/
PL/SQL函数:
CREATE FUNCTION replace_words(
word_list IN stringlist,
id_list IN intlist,
sentence IN temp.sentence%TYPE
) RETURN temp.sentence%TYPE
IS
p_sentence temp.sentence%TYPE := UPPER( sentence );
p_pos PLS_INTEGER := 1;
p_min_word_index PLS_INTEGER;
p_word_index PLS_INTEGER;
p_start PLS_INTEGER;
p_index PLS_INTEGER;
o_sentence temp.sentence%TYPE;
BEGIN
LOOP
p_min_word_index := NULL;
p_index := NULL;
FOR i IN 1 .. word_list.COUNT LOOP
p_word_index := p_pos;
LOOP
p_word_index := INSTR( p_sentence, word_list(i), p_word_index );
EXIT WHEN p_word_index = 0;
IF ( p_word_index > 1
AND REGEXP_LIKE( SUBSTR( p_sentence, p_word_index - 1, 1 ), '\w' )
)
OR REGEXP_LIKE( SUBSTR( p_sentence, p_word_index + LENGTH( word_list(i) ), 1 ), '\w' )
THEN
p_word_index := p_word_index + 1;
CONTINUE;
END IF;
IF p_min_word_index IS NULL OR p_word_index < p_min_word_index THEN
p_min_word_index := p_word_index;
p_index := i;
END IF;
EXIT;
END LOOP;
END LOOP;
IF p_index IS NULL THEN
o_sentence := o_sentence || SUBSTR( sentence, p_pos );
EXIT;
ELSE
o_sentence := o_sentence
|| SUBSTR( sentence, p_pos, p_min_word_index - p_pos )
|| 'http://localhost/'
|| id_list(p_index)
|| '/<u>'
|| SUBSTR( sentence, p_min_word_index, LENGTH( word_list( p_index ) ) )
|| '</u>';
p_pos := p_min_word_index + LENGTH( word_list( p_index ) );
END IF;
END LOOP;
RETURN o_sentence;
END;
/
合并:
MERGE INTO temp dst
USING (
WITH lists ( word_list, id_list ) AS (
SELECT CAST(
COLLECT(
UPPER( word )
ORDER BY LENGTH( word ) DESC, UPPER( word ) ASC, ROWNUM
)
AS stringlist
),
CAST(
COLLECT(
id
ORDER BY LENGTH( word ) DESC, UPPER( word ) ASC, ROWNUM
)
AS intlist
)
FROM temp
)
SELECT t.ROWID rid,
replace_words(
word_list,
id_list,
sentence
) AS replaced_sentence
FROM temp t
CROSS JOIN lists
) src
ON ( dst.ROWID = src.RID )
WHEN MATCHED THEN
UPDATE SET sentence = src.replaced_sentence;
输出:
SELECT * FROM temp;
ID | WORD | SENTENCE
-: | :---------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 | automation testing | automtestingation http://localhost/2/<u>TeStInG</u> http://localhost/2/<u>TEST</u> is popular kind of http://localhost/2/<u>testing</u>
2 | testing | http://localhost/3/<u>manual testing</u>
2 | test | http://localhost/2/<u>test</u>
3 | manual testing | this is an old method of http://localhost/2/<u>testing</u>
4 | punctuation | http://localhost/1/<u>automation testing</u>,http://localhost/3/<u>manual testing</u>,http://localhost/4/<u>punctuation</u>,automanual http://localhost/2/<u>testing</u>-http://localhost/2/<u>testing</u>
5 | B-number analysis | http://localhost/6/<u>B-number analysis table</u>
6 | B-number analysis table | http://localhost/2/<u>testing</u> http://localhost/5/<u>B-number analysis</u>
7 | Not Matched | http://localhost/2/<u>testing</u> http://localhost/2/<u>testing</u> http://localhost/2/<u>testing</u>
8 | ^[($ | http://localhost/2/<u>testing</u> characters http://localhost/8/<u>^[($</u> that need escaping in a regular expression
db<>fiddle here
我有一个包含单词和句子列的 table。这个想法是,如果在单词列中找到单词,则用 link(包括单词本身)替换句子中的单词。下面的查询完全替换了,但是由于 link 是从 temp.word 列构造的,所以句子中单词的大小写更改为 word 列中单词的大小写。有没有办法在句子本身保留相同的大小写?
Create table temp(
id NUMBER,
word VARCHAR2(1000),
sentence VARCHAR2(2000)
);
insert into temp
SELECT 1,'automation testing', 'automtestingation Testing is popular kind of testing' FROM DUAL UNION ALL
SELECT 2,'testing','manual testing' FROM DUAL UNION ALL
SELECT 3,'manual testing','this is an old method of testing' FROM DUAL UNION ALL
SELECT 4,'punctuation','automation testing,manual testing,punctuation,automanual testing-testing' FROM DUAL UNION ALL
SELECT 5,'B-number analysis','B-number analysis table' FROM DUAL UNION ALL
SELECT 6,'B-number analysis table','testing B-number analysis' FROM DUAL UNION ALL
SELECT 7,'Not Matched','testing testing testing' FROM DUAL;
with words(id, word, word_length, search1, replace1, search2, replace2) as (
select id, word, length(word),
'(^|\W)' || REGEXP_REPLACE(word, '([][)(}{|^$\.*+?])', '\') || '($|\W)',
'{'|| id ||'}',
'{'|| id ||'}',
'http://localhost/' || id || '/<u>' || word || '</u>'
FROM temp
)
, joined_data as (
select w.search1, w.replace1, w.search2, w.replace2,
s.rowid s_rid, s.sentence,
row_number() over(partition by s.rowid order by word_length desc) rn
from words w
join temp s
on instr(UPPER(s.sentence), UPPER(w.word)) > 0
and regexp_like(s.sentence, w.search1)
)
, unpivoted_data as (
select S_RID, SENTENCE, PHASE, SEARCH_STRING, REPLACE_STRING,
row_number() over(partition by s_rid order by phase, rn) rn,
case when row_number() over(partition by s_rid order by phase, rn)
= count(*) over(partition by s_rid)
then 1
else 0
end is_last
from joined_data
unpivot(
(search_string, replace_string)
for phase in ( (search1, replace1) as 1, (search2, replace2) as 2 ))
)
, replaced_data(S_RID, RN, is_last, SENTENCE) as (
select S_RID, RN, is_last,
regexp_replace(SENTENCE, search_string, replace_string,1,0,'i')
from unpivoted_data
where rn = 1
union all
select n.S_RID, n.RN, n.is_last,
case when n.phase = 1
then regexp_replace(o.SENTENCE, n.search_string, n.replace_string,1,0,'i')
else replace(o.SENTENCE, n.search_string, n.replace_string)
end
from unpivoted_data n
join replaced_data o
on o.s_rid = n.s_rid and n.rn = o.rn + 1
)
select s_rid, sentence from replaced_data
where is_last = 1
order by s_rid;
For example, for id = 1, the sentence is automtestingation Testing is popular kind of testing
After replacement it will be automtestingation http://localhost/2/<u>testing</u> is popular kind of http://localhost/2/<u>testing</u>.
The word Testing is replaced with testing(from the temp.word column).
预期结果是
automtestingation http://localhost/2/<u>Testing</u> is popular kind of http://localhost/2/<u>testing</u>
虽然肯定有一种方法可以在单个 SQL 语句中完成此操作,但我认为使用单独的函数可以更好地解决此问题:
create or replace function replace_words(p_word varchar2, p_sentence varchar2) return varchar2 is
v_match_position number;
v_match_count number := 0;
v_new_sentence varchar2(4000) := p_sentence;
begin
--Find all matches.
loop
--Find Nth case-insensitive match
v_match_count := v_match_count + 1;
v_match_position := regexp_instr(
srcstr => v_new_sentence,
pattern => p_word,
occurrence => v_match_count,
modifier => 'i');
exit when v_match_position = 0;
--Insert text, instead of replacing, to keep the original case.
v_new_sentence :=
substr(v_new_sentence, 1, v_match_position - 1) || 'http://localhost/2/<u>'||
substr(v_new_sentence, v_match_position, length(p_word)) || '</u>'||
substr(v_new_sentence, v_match_position + length(p_word));
end loop;
return v_new_sentence;
end;
/
然后 SQL 查询如下所示:
select id, word, sentence, replace_words(word, sentence) from temp;
Oracle 设置:
Create table temp(
id NUMBER,
word VARCHAR2(1000),
Sentence VARCHAR2(2000)
);
insert into temp
SELECT 1,'automation testing', 'automtestingation TeStInG TEST is popular kind of testing' FROM DUAL UNION ALL
SELECT 2,'testing','manual testing' FROM DUAL UNION ALL
select 2,'test', 'test' FROM DUAL UNION ALL
SELECT 3,'manual testing','this is an old method of testing' FROM DUAL UNION ALL
SELECT 4,'punctuation','automation testing,manual testing,punctuation,automanual testing-testing' FROM DUAL UNION ALL
SELECT 5,'B-number analysis','B-number analysis table' FROM DUAL UNION ALL
SELECT 6,'B-number analysis table','testing B-number analysis' FROM DUAL UNION ALL
SELECT 7,'Not Matched','testing testing testing' FROM DUAL UNION ALL
SELECT 8,'^[($','testing characters ^[($ that need escaping in a regular expression' FROM DUAL;
SQL 类型:
CREATE TYPE stringlist IS TABLE OF VARCHAR2(4000);
/
CREATE TYPE intlist IS TABLE OF NUMBER(20,0);
/
PL/SQL函数:
CREATE FUNCTION replace_words(
word_list IN stringlist,
id_list IN intlist,
sentence IN temp.sentence%TYPE
) RETURN temp.sentence%TYPE
IS
p_sentence temp.sentence%TYPE := UPPER( sentence );
p_pos PLS_INTEGER := 1;
p_min_word_index PLS_INTEGER;
p_word_index PLS_INTEGER;
p_start PLS_INTEGER;
p_index PLS_INTEGER;
o_sentence temp.sentence%TYPE;
BEGIN
LOOP
p_min_word_index := NULL;
p_index := NULL;
FOR i IN 1 .. word_list.COUNT LOOP
p_word_index := p_pos;
LOOP
p_word_index := INSTR( p_sentence, word_list(i), p_word_index );
EXIT WHEN p_word_index = 0;
IF ( p_word_index > 1
AND REGEXP_LIKE( SUBSTR( p_sentence, p_word_index - 1, 1 ), '\w' )
)
OR REGEXP_LIKE( SUBSTR( p_sentence, p_word_index + LENGTH( word_list(i) ), 1 ), '\w' )
THEN
p_word_index := p_word_index + 1;
CONTINUE;
END IF;
IF p_min_word_index IS NULL OR p_word_index < p_min_word_index THEN
p_min_word_index := p_word_index;
p_index := i;
END IF;
EXIT;
END LOOP;
END LOOP;
IF p_index IS NULL THEN
o_sentence := o_sentence || SUBSTR( sentence, p_pos );
EXIT;
ELSE
o_sentence := o_sentence
|| SUBSTR( sentence, p_pos, p_min_word_index - p_pos )
|| 'http://localhost/'
|| id_list(p_index)
|| '/<u>'
|| SUBSTR( sentence, p_min_word_index, LENGTH( word_list( p_index ) ) )
|| '</u>';
p_pos := p_min_word_index + LENGTH( word_list( p_index ) );
END IF;
END LOOP;
RETURN o_sentence;
END;
/
合并:
MERGE INTO temp dst
USING (
WITH lists ( word_list, id_list ) AS (
SELECT CAST(
COLLECT(
UPPER( word )
ORDER BY LENGTH( word ) DESC, UPPER( word ) ASC, ROWNUM
)
AS stringlist
),
CAST(
COLLECT(
id
ORDER BY LENGTH( word ) DESC, UPPER( word ) ASC, ROWNUM
)
AS intlist
)
FROM temp
)
SELECT t.ROWID rid,
replace_words(
word_list,
id_list,
sentence
) AS replaced_sentence
FROM temp t
CROSS JOIN lists
) src
ON ( dst.ROWID = src.RID )
WHEN MATCHED THEN
UPDATE SET sentence = src.replaced_sentence;
输出:
SELECT * FROM temp;
ID | WORD | SENTENCE -: | :---------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 1 | automation testing | automtestingation http://localhost/2/<u>TeStInG</u> http://localhost/2/<u>TEST</u> is popular kind of http://localhost/2/<u>testing</u> 2 | testing | http://localhost/3/<u>manual testing</u> 2 | test | http://localhost/2/<u>test</u> 3 | manual testing | this is an old method of http://localhost/2/<u>testing</u> 4 | punctuation | http://localhost/1/<u>automation testing</u>,http://localhost/3/<u>manual testing</u>,http://localhost/4/<u>punctuation</u>,automanual http://localhost/2/<u>testing</u>-http://localhost/2/<u>testing</u> 5 | B-number analysis | http://localhost/6/<u>B-number analysis table</u> 6 | B-number analysis table | http://localhost/2/<u>testing</u> http://localhost/5/<u>B-number analysis</u> 7 | Not Matched | http://localhost/2/<u>testing</u> http://localhost/2/<u>testing</u> http://localhost/2/<u>testing</u> 8 | ^[($ | http://localhost/2/<u>testing</u> characters http://localhost/8/<u>^[($</u> that need escaping in a regular expression
db<>fiddle here