使用正则表达式在逗号后提取值
extract values after comma's using regexp
我正在使用 Oracle 19c 数据库
下面是我的字符串值
variable B1 varchar2(60)
exec :B1:='(199,''TEST121''),(156,''TEST''),(1561,''TEST99'')';
我希望输出为
| ID | NAME |
| -------- | -------------- |
| 199| TEST121 |
| 156| TEST |
| 1561| TEST99 |
select regexp_substr(regexp_substr(:b1,'[^A-Z+0-9][0-9]+', 1,level),'[0-9]+') as id , regexp_substr(:b1,'[A-Z]+[0-9]', 1,level) as name from dual connect by regexp_substr(:b1,'[0-9]', 1,level) is not null;
此查询仅给出以数字结尾的字符串值的输出。
这是一种选择:
SQL> WITH
2 test (col)
3 AS
4 (SELECT '(199,''TEST121''),(156,''TEST''),(1561,''TEST99'')' FROM DUAL)
5 SELECT SUBSTR (str, 1, INSTR (str, ',') - 1) id,
6 SUBSTR (str, INSTR (str, ',') + 1) name
7 FROM ( SELECT REGEXP_SUBSTR (
8 REPLACE (
9 REPLACE (
10 REPLACE (REPLACE (col, '),(', '#'), CHR (39), ''),
11 '(',
12 ''),
13 ')',
14 ''),
15 '[^#]+',
16 1,
17 LEVEL) str
18 FROM test
19 CONNECT BY LEVEL <= REGEXP_COUNT (REPLACE (col, '),(', '#'), '#') + 1);
ID NAME
-------------------------------- --------------------------------
199 TEST121
156 TEST
1561 TEST99
SQL>
它有什么作用?
- 第 1 - 4 行 - 示例数据
- 第 8 - 14 行 - 将
),(
替换为 #
(以获得更简单的分隔符);删除前导和尾随括号
- 第 7 - 19 行 - 将示例字符串拆分成行
- 第 5、6 行 - 从每行中提取
ID
和 NAME
@Littlefoot给出的选项的详细解释
下面是将多值字符串转换为列和行的测试用例。
set lines 999 pages 999
col ID for a20
col NAME for a20
variable B1 varchar2(60)
exec :B1:='(99,''TABLE1''),(56,''INDEX1''),(199,''TABLE''),(156,''INDEX'')';
variable B1 varchar2(100)
exec :B1:='(''TABL234E1~'',99),(''I1NDEX1~'',5ABC6),(''TAB1LE'',4ABC0),(''IND11EX'',6ACDE0)';
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
SELECT SUBSTR (str, 1, INSTR (str, ',') - 1) id,
SUBSTR (str, INSTR (str, ',') + 1) name
FROM ( SELECT REGEXP_SUBSTR (
REPLACE (
REPLACE (
REPLACE (REPLACE (col, '),(', '#'), CHR (39), ''),
'(',
''),
')',
''),
'[^#]+',
1,
LEVEL) str
FROM test
CONNECT BY LEVEL <= REGEXP_COUNT (REPLACE (col, '),(', '#'), '#') + 1);
说明:
variable B1 varchar2(60)
exec :B1:='(99,''TABLE1''),(56,''INDEX1''),(199,''TABLE''),(156,''INDEX'')';
第一步
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
select col from test;
此步骤有助于获取 oracle 行格式的基本字符串。
结果:
(99,'TABLE1'),(56,'INDEX1'),(199,'TABLE'),(156,'INDEX')
第 2 步
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
select REPLACE(col, '),(','#') from test;
此步骤有助于将 '),(' 替换为 '#'(以获得更简单的多值字符串分隔符)。
结果:
(99,'TABLE1'#56,'INDEX1'#199,'TABLE'#156,'INDEX')
第 3 步
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
select REPLACE(REPLACE(col, '),(','#'),CHR(39),'') from test;
此步骤有助于替换上一步中的单引号。
结果:
(TABL234E1~,99#I1NDEX1~,5ABC6#TAB1LE,4ABC0#IND11EX,6ACDE0)
第四步
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
select REPLACE(REPLACE(REPLACE(col, '),(','#'),CHR(39),''),'(','') from test;
此步骤有助于将“(”替换为“”(以删除上一步中的前导括号)。
结果:
99,TABLE1#56,INDEX1#199,TABLE#156,INDEX)
第 5 步
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
select REPLACE(REPLACE(REPLACE(REPLACE(col, '),(','#'),CHR(39),''),'(',''),')','') from test;
此步骤有助于将“)”替换为“”(以删除上一步中的尾随括号)。
结果:
99,TABLE1#56,INDEX1#199,TABLE#156,INDEX
第 6 步
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
SELECT *
FROM ( SELECT REGEXP_SUBSTR (
REPLACE (
REPLACE (
REPLACE (REPLACE (col, '),(', '#'), CHR (39), ''),
'(',
''),
')',
''),
'[^#]+',
1,
LEVEL) str
FROM test
CONNECT BY LEVEL <= REGEXP_COUNT (REPLACE (col, '),(', '#'), '#') + 1);
这一步有两个组成部分:
a. REGEXP_SUBSTR(<value from previous step>,'[^#]+',1,LEVEL)
'[^#]+' ==> searches and gets every character other than '#'
1==> starting position of the searches
LEVEL==> LEVEL can be used in conjunction with CONNECT BY LEVEL clause. All the chunks of the source string can be displayed by using the LEVEL keyword as the match occurrence.
b. CONNECT BY LEVEL <= REGEXP_COUNT (REPLACE (col, '),(', '#'), '#') + 1
REPLACE (col, '),(', '#') ==> This helps in replacing '),(' with '#' (to get a simpler separator for multivalued string).
REGEXP_COUNT(<output from previous step>,'#')+1==> Count the number of '#' +1 in the source string.
Here, the CONNECT BY LEVEL clause generates the rows equal to the number of '#' +1 in the source string.
结果:
99,TABLE1
56,INDEX1
199,TABLE
156,INDEX
第 7 步
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
SELECT SUBSTR (str, 1, INSTR (str, ',') - 1) id,
SUBSTR (str, INSTR (str, ',') + 1) name
FROM ( SELECT REGEXP_SUBSTR (
REPLACE (
REPLACE (
REPLACE (REPLACE (col, '),(', '#'), CHR (39), ''),
'(',
''),
')',
''),
'[^#]+',
1,
LEVEL) str
FROM test
CONNECT BY LEVEL <= REGEXP_COUNT (REPLACE (col, '),(', '#'), '#') + 1);
这一步有两个组成部分:
a.SUBSTR (str, 1, INSTR (str, ',') - 1)==> str 从前面的输出中得到 tken 并且从第一个位置开始,它将字符从 ',' 到 -1 位置。
b.SUBSTR (str, INSTR (str, ',') + 1) name==> str 是从前面的输出并从 ',' 开始的 tken,它将字符从 ',' 到 +1 位置。
结果:
ID NAME
-------------------- --------------------
99 TABLE1
56 INDEX1
199 TABLE
156 INDEX
我正在使用 Oracle 19c 数据库
下面是我的字符串值
variable B1 varchar2(60)
exec :B1:='(199,''TEST121''),(156,''TEST''),(1561,''TEST99'')';
我希望输出为
| ID | NAME |
| -------- | -------------- |
| 199| TEST121 |
| 156| TEST |
| 1561| TEST99 |
select regexp_substr(regexp_substr(:b1,'[^A-Z+0-9][0-9]+', 1,level),'[0-9]+') as id , regexp_substr(:b1,'[A-Z]+[0-9]', 1,level) as name from dual connect by regexp_substr(:b1,'[0-9]', 1,level) is not null;
此查询仅给出以数字结尾的字符串值的输出。
这是一种选择:
SQL> WITH
2 test (col)
3 AS
4 (SELECT '(199,''TEST121''),(156,''TEST''),(1561,''TEST99'')' FROM DUAL)
5 SELECT SUBSTR (str, 1, INSTR (str, ',') - 1) id,
6 SUBSTR (str, INSTR (str, ',') + 1) name
7 FROM ( SELECT REGEXP_SUBSTR (
8 REPLACE (
9 REPLACE (
10 REPLACE (REPLACE (col, '),(', '#'), CHR (39), ''),
11 '(',
12 ''),
13 ')',
14 ''),
15 '[^#]+',
16 1,
17 LEVEL) str
18 FROM test
19 CONNECT BY LEVEL <= REGEXP_COUNT (REPLACE (col, '),(', '#'), '#') + 1);
ID NAME
-------------------------------- --------------------------------
199 TEST121
156 TEST
1561 TEST99
SQL>
它有什么作用?
- 第 1 - 4 行 - 示例数据
- 第 8 - 14 行 - 将
),(
替换为#
(以获得更简单的分隔符);删除前导和尾随括号 - 第 7 - 19 行 - 将示例字符串拆分成行
- 第 5、6 行 - 从每行中提取
ID
和NAME
@Littlefoot给出的选项的详细解释
下面是将多值字符串转换为列和行的测试用例。
set lines 999 pages 999
col ID for a20
col NAME for a20
variable B1 varchar2(60)
exec :B1:='(99,''TABLE1''),(56,''INDEX1''),(199,''TABLE''),(156,''INDEX'')';
variable B1 varchar2(100)
exec :B1:='(''TABL234E1~'',99),(''I1NDEX1~'',5ABC6),(''TAB1LE'',4ABC0),(''IND11EX'',6ACDE0)';
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
SELECT SUBSTR (str, 1, INSTR (str, ',') - 1) id,
SUBSTR (str, INSTR (str, ',') + 1) name
FROM ( SELECT REGEXP_SUBSTR (
REPLACE (
REPLACE (
REPLACE (REPLACE (col, '),(', '#'), CHR (39), ''),
'(',
''),
')',
''),
'[^#]+',
1,
LEVEL) str
FROM test
CONNECT BY LEVEL <= REGEXP_COUNT (REPLACE (col, '),(', '#'), '#') + 1);
说明:
variable B1 varchar2(60)
exec :B1:='(99,''TABLE1''),(56,''INDEX1''),(199,''TABLE''),(156,''INDEX'')';
第一步
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
select col from test;
此步骤有助于获取 oracle 行格式的基本字符串。
结果:
(99,'TABLE1'),(56,'INDEX1'),(199,'TABLE'),(156,'INDEX')
第 2 步
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
select REPLACE(col, '),(','#') from test;
此步骤有助于将 '),(' 替换为 '#'(以获得更简单的多值字符串分隔符)。
结果:
(99,'TABLE1'#56,'INDEX1'#199,'TABLE'#156,'INDEX')
第 3 步
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
select REPLACE(REPLACE(col, '),(','#'),CHR(39),'') from test;
此步骤有助于替换上一步中的单引号。
结果:
(TABL234E1~,99#I1NDEX1~,5ABC6#TAB1LE,4ABC0#IND11EX,6ACDE0)
第四步
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
select REPLACE(REPLACE(REPLACE(col, '),(','#'),CHR(39),''),'(','') from test;
此步骤有助于将“(”替换为“”(以删除上一步中的前导括号)。
结果:
99,TABLE1#56,INDEX1#199,TABLE#156,INDEX)
第 5 步
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
select REPLACE(REPLACE(REPLACE(REPLACE(col, '),(','#'),CHR(39),''),'(',''),')','') from test;
此步骤有助于将“)”替换为“”(以删除上一步中的尾随括号)。
结果:
99,TABLE1#56,INDEX1#199,TABLE#156,INDEX
第 6 步
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
SELECT *
FROM ( SELECT REGEXP_SUBSTR (
REPLACE (
REPLACE (
REPLACE (REPLACE (col, '),(', '#'), CHR (39), ''),
'(',
''),
')',
''),
'[^#]+',
1,
LEVEL) str
FROM test
CONNECT BY LEVEL <= REGEXP_COUNT (REPLACE (col, '),(', '#'), '#') + 1);
这一步有两个组成部分:
a. REGEXP_SUBSTR(<value from previous step>,'[^#]+',1,LEVEL)
'[^#]+' ==> searches and gets every character other than '#'
1==> starting position of the searches
LEVEL==> LEVEL can be used in conjunction with CONNECT BY LEVEL clause. All the chunks of the source string can be displayed by using the LEVEL keyword as the match occurrence.
b. CONNECT BY LEVEL <= REGEXP_COUNT (REPLACE (col, '),(', '#'), '#') + 1
REPLACE (col, '),(', '#') ==> This helps in replacing '),(' with '#' (to get a simpler separator for multivalued string).
REGEXP_COUNT(<output from previous step>,'#')+1==> Count the number of '#' +1 in the source string.
Here, the CONNECT BY LEVEL clause generates the rows equal to the number of '#' +1 in the source string.
结果:
99,TABLE1
56,INDEX1
199,TABLE
156,INDEX
第 7 步
WITH
test (col)
AS
(SELECT :b1 FROM DUAL)
SELECT SUBSTR (str, 1, INSTR (str, ',') - 1) id,
SUBSTR (str, INSTR (str, ',') + 1) name
FROM ( SELECT REGEXP_SUBSTR (
REPLACE (
REPLACE (
REPLACE (REPLACE (col, '),(', '#'), CHR (39), ''),
'(',
''),
')',
''),
'[^#]+',
1,
LEVEL) str
FROM test
CONNECT BY LEVEL <= REGEXP_COUNT (REPLACE (col, '),(', '#'), '#') + 1);
这一步有两个组成部分: a.SUBSTR (str, 1, INSTR (str, ',') - 1)==> str 从前面的输出中得到 tken 并且从第一个位置开始,它将字符从 ',' 到 -1 位置。 b.SUBSTR (str, INSTR (str, ',') + 1) name==> str 是从前面的输出并从 ',' 开始的 tken,它将字符从 ',' 到 +1 位置。
结果:
ID NAME
-------------------- --------------------
99 TABLE1
56 INDEX1
199 TABLE
156 INDEX