为什么 Oracle SQL 中的这个正则表达式丢失了字符串的开头?
Why this regexp in Oracle SQL loses the beginning of string?
我正在使用 p_delimeter
拆分字符串 p_value
,它可能包含一个或多个符号(这就是为什么正则表达式不像经常使用的 [^,]+
)。
在大多数情况下,以下查询可以预见地工作,但我对以下情况感到茫然:
- 字符串
p_value
包含换行符chr(10)
,
p_value
不包含p_delimeter
作为子字符串,
所以我希望结果是整个 p_value
一行,但换行后只得到余数。
这里假设正则表达式将换行符视为普通符号,因为
'm'
修饰符在 regexp_substr
.
的调用中不存在
请解释此行为是否正确以及如何获得预期结果。
WITH
params AS (SELECT 'ab' || chr(10) || 'cd' p_value,
'xxx' p_delimeter
FROM dual
)
SELECT regexp_substr(p_value, '(.*?)(' || p_delimeter || '|$)', 1, level, 'c', 1) AS CUT
FROM params
CONNECT BY regexp_substr(p_value, '(.*?)(' || p_delimeter || '|$)', 1, level, 'c', 1) IS NOT NULL;
Actual result: Expected result:
----- ------
CUT CUT
----- ------
cd ab/cd
^
'this is just a marker for a line break [= chr(10)]'
通过将 n
标志添加到正则表达式,允许 .
模式匹配所有字符:
WITH params ( p_value, p_delimiter ) AS (
SELECT 'ab' || chr(10) || 'cd', 'xxx' FROM dual
)
SELECT REGEXP_SUBSTR(p_value, '(.*?)(' || p_delimeter || '|$)', 1, level, 'cn', 1) AS CUT
FROM params
CONNECT BY LEVEL < REGEXP_COUNT( p_value, '(.*?)(' || p_delimeter || '|$)' );
或者您可以使用一个简单的函数:
Oracle 设置:
CREATE TYPE VARCHAR2_TABLE AS TABLE OF VARCHAR2(4000);
/
CREATE OR REPLACE FUNCTION split_String(
i_str IN VARCHAR2,
i_delim IN VARCHAR2 DEFAULT ','
) RETURN VARCHAR2_TABLE DETERMINISTIC
AS
p_result VARCHAR2_TABLE := VARCHAR2_TABLE();
p_start NUMBER(5) := 1;
p_end NUMBER(5);
c_len CONSTANT NUMBER(5) := LENGTH( i_str );
c_ld CONSTANT NUMBER(5) := LENGTH( i_delim );
BEGIN
IF c_len > 0 THEN
p_end := INSTR( i_str, i_delim, p_start );
WHILE p_end > 0 LOOP
p_result.EXTEND;
p_result( p_result.COUNT ) := SUBSTR( i_str, p_start, p_end - p_start );
p_start := p_end + c_ld;
p_end := INSTR( i_str, i_delim, p_start );
END LOOP;
IF p_start <= c_len + 1 THEN
p_result.EXTEND;
p_result( p_result.COUNT ) := SUBSTR( i_str, p_start, c_len - p_start + 1 );
END IF;
END IF;
RETURN p_result;
END;
/
查询:
WITH params ( p_value, p_delimiter ) AS (
SELECT 'ab' || chr(10) || 'cd', 'xxx' FROM dual
)
SELECT COLUMN_VALUE AS CUT
FROM params,
TABLE( split_String( p_value, p_delimiter ) );
我正在使用 p_delimeter
拆分字符串 p_value
,它可能包含一个或多个符号(这就是为什么正则表达式不像经常使用的 [^,]+
)。
在大多数情况下,以下查询可以预见地工作,但我对以下情况感到茫然:
- 字符串
p_value
包含换行符chr(10)
, p_value
不包含p_delimeter
作为子字符串,
所以我希望结果是整个 p_value
一行,但换行后只得到余数。
这里假设正则表达式将换行符视为普通符号,因为
'm'
修饰符在 regexp_substr
.
请解释此行为是否正确以及如何获得预期结果。
WITH
params AS (SELECT 'ab' || chr(10) || 'cd' p_value,
'xxx' p_delimeter
FROM dual
)
SELECT regexp_substr(p_value, '(.*?)(' || p_delimeter || '|$)', 1, level, 'c', 1) AS CUT
FROM params
CONNECT BY regexp_substr(p_value, '(.*?)(' || p_delimeter || '|$)', 1, level, 'c', 1) IS NOT NULL;
Actual result: Expected result:
----- ------
CUT CUT
----- ------
cd ab/cd
^
'this is just a marker for a line break [= chr(10)]'
通过将 n
标志添加到正则表达式,允许 .
模式匹配所有字符:
WITH params ( p_value, p_delimiter ) AS (
SELECT 'ab' || chr(10) || 'cd', 'xxx' FROM dual
)
SELECT REGEXP_SUBSTR(p_value, '(.*?)(' || p_delimeter || '|$)', 1, level, 'cn', 1) AS CUT
FROM params
CONNECT BY LEVEL < REGEXP_COUNT( p_value, '(.*?)(' || p_delimeter || '|$)' );
或者您可以使用一个简单的函数:
Oracle 设置:
CREATE TYPE VARCHAR2_TABLE AS TABLE OF VARCHAR2(4000);
/
CREATE OR REPLACE FUNCTION split_String(
i_str IN VARCHAR2,
i_delim IN VARCHAR2 DEFAULT ','
) RETURN VARCHAR2_TABLE DETERMINISTIC
AS
p_result VARCHAR2_TABLE := VARCHAR2_TABLE();
p_start NUMBER(5) := 1;
p_end NUMBER(5);
c_len CONSTANT NUMBER(5) := LENGTH( i_str );
c_ld CONSTANT NUMBER(5) := LENGTH( i_delim );
BEGIN
IF c_len > 0 THEN
p_end := INSTR( i_str, i_delim, p_start );
WHILE p_end > 0 LOOP
p_result.EXTEND;
p_result( p_result.COUNT ) := SUBSTR( i_str, p_start, p_end - p_start );
p_start := p_end + c_ld;
p_end := INSTR( i_str, i_delim, p_start );
END LOOP;
IF p_start <= c_len + 1 THEN
p_result.EXTEND;
p_result( p_result.COUNT ) := SUBSTR( i_str, p_start, c_len - p_start + 1 );
END IF;
END IF;
RETURN p_result;
END;
/
查询:
WITH params ( p_value, p_delimiter ) AS (
SELECT 'ab' || chr(10) || 'cd', 'xxx' FROM dual
)
SELECT COLUMN_VALUE AS CUT
FROM params,
TABLE( split_String( p_value, p_delimiter ) );