Oracle中出现单个逗号时拆分字符串,出现多个逗号时不拆分
Split string when single comma occurs and not split when more than one comma occurs in Oracle
--案例#1
select '1,2,,2,3,4,,,4,5, , ,, , ,, , ,,,,,,5,6' from dual;
--案例#2
select q'[1aaaaaa, bbb, bbbb (cccc,,ccccc) 20,,,20-09-30 11:11:11 (dddddddd): below eeeeeee is ddddd of teh dddd. ajhvajshbfjasbhfjkabsdfkjabsfkljbaaksjfka . 569716476@@asdasa asdasdi (asfasfasf) 2020-09-30 22:22:27 (sdfsdfal): As per sdfsdfsdfg sdfm: sds the below is a sdfsdf@@@@3**** sd sd sdd sdffve sdffin with sdf ��� the below is correct. the "sdfd" sds tsd sdfe fsI as sdfL. hsdfe we do not sdffte fsw dsddDs123**** just new sdfsdng ssdfsds.@@sdfsda sdfsdni (sdffdsdUP) 2020-09-23 22:31:54 (sdffsdal): sdil zxct cx Gzxcz zxcc: zxcc/zxI-Jcxz- cx zxccx***zxccc rzxcczxc***. 569zxc476@@zxccxi zxcxzze (zxcczxzxc) 2020-02-28 22:21:26, (zxtezxcxz) 03/28-As per zxcc, ,, ,,, , ,,,,,, ,zxzxx by cx cxcx zxzz is zxcxzz zxcxz zxcc. zxc zx]'
from dual;
--案例#3
Please just copy paste the string present in Case#2 several times so that the overall length increase 4000 characters and try you solution.
这个字符串应该在单个逗号出现时拆分,并且不应拆分多次出现的逗号,如 double/triple 等。
此外,如果任何人都可以用 12345 之类的字符串替换上面的多个逗号,那么它将对我有用。不要替换单个逗号
请注意,上面的字符串来自 CLOB 列,因此需要一个高效的查询,它应该花费更少 cpu。
o/p:
1
2,,2
3
4,,,4
5, , ,, , ,, , ,,,,,,5 -- NOTE: this has multiple commas with space
6
从 Oracle 11gR2 开始,您可以使用:
WITH strings (value, end_pos, term) AS (
SELECT value,
REGEXP_INSTR(value, '((\d+(, *){2,})*\d+)(,|$)', 1, 1, 1),
REGEXP_SUBSTR(value, '((\d+(, *){2,})*\d+)(,|$)', 1, 1, NULL, 1)
FROM table_name
UNION ALL
SELECT value,
REGEXP_INSTR(value, '((\d+(, *){2,})*\d+)(,|$)', end_pos, 1, 1),
REGEXP_SUBSTR(value, '((\d+(, *){2,})*\d+)(,|$)', end_pos, 1, NULL, 1)
FROM strings
WHERE end_pos > 0
)
SELECT term
FROM strings
WHERE end_pos > 0;
其中,对于示例数据:
CREATE TABLE table_name ( value ) AS
select '1,2,,2,3,4,,,4,5, , ,, , ,, , ,,,,,,5,6' from dual;
输出:
TERM
1
2,,2
3
4,,,4
5, , ,, , ,, , ,,,,,,5
6
可能适用于早期版本的替代方法是:
SELECT RTRIM(
REGEXP_SUBSTR(
t.value,
'((\d+(, *){2,})*\d+)(,|$)',
1,
l.COLUMN_VALUE
),
','
) AS term
FROM table_name t
CROSS JOIN
TABLE(
CAST(
MULTISET(
SELECT LEVEL
FROM DUAL
CONNECT BY
REGEXP_INSTR(
t.value,
'((\d+(, *){2,})*\d+)(,|$)',
1,
LEVEL
) > 0
)
AS SYS.ODCINUMBERLIST
)
) l
sqlfiddle here
--案例#1
select '1,2,,2,3,4,,,4,5, , ,, , ,, , ,,,,,,5,6' from dual;
--案例#2
select q'[1aaaaaa, bbb, bbbb (cccc,,ccccc) 20,,,20-09-30 11:11:11 (dddddddd): below eeeeeee is ddddd of teh dddd. ajhvajshbfjasbhfjkabsdfkjabsfkljbaaksjfka . 569716476@@asdasa asdasdi (asfasfasf) 2020-09-30 22:22:27 (sdfsdfal): As per sdfsdfsdfg sdfm: sds the below is a sdfsdf@@@@3**** sd sd sdd sdffve sdffin with sdf ��� the below is correct. the "sdfd" sds tsd sdfe fsI as sdfL. hsdfe we do not sdffte fsw dsddDs123**** just new sdfsdng ssdfsds.@@sdfsda sdfsdni (sdffdsdUP) 2020-09-23 22:31:54 (sdffsdal): sdil zxct cx Gzxcz zxcc: zxcc/zxI-Jcxz- cx zxccx***zxccc rzxcczxc***. 569zxc476@@zxccxi zxcxzze (zxcczxzxc) 2020-02-28 22:21:26, (zxtezxcxz) 03/28-As per zxcc, ,, ,,, , ,,,,,, ,zxzxx by cx cxcx zxzz is zxcxzz zxcxz zxcc. zxc zx]'
from dual;
--案例#3
Please just copy paste the string present in Case#2 several times so that the overall length increase 4000 characters and try you solution.
这个字符串应该在单个逗号出现时拆分,并且不应拆分多次出现的逗号,如 double/triple 等。
此外,如果任何人都可以用 12345 之类的字符串替换上面的多个逗号,那么它将对我有用。不要替换单个逗号
请注意,上面的字符串来自 CLOB 列,因此需要一个高效的查询,它应该花费更少 cpu。
o/p:
1
2,,2
3
4,,,4
5, , ,, , ,, , ,,,,,,5 -- NOTE: this has multiple commas with space
6
从 Oracle 11gR2 开始,您可以使用:
WITH strings (value, end_pos, term) AS (
SELECT value,
REGEXP_INSTR(value, '((\d+(, *){2,})*\d+)(,|$)', 1, 1, 1),
REGEXP_SUBSTR(value, '((\d+(, *){2,})*\d+)(,|$)', 1, 1, NULL, 1)
FROM table_name
UNION ALL
SELECT value,
REGEXP_INSTR(value, '((\d+(, *){2,})*\d+)(,|$)', end_pos, 1, 1),
REGEXP_SUBSTR(value, '((\d+(, *){2,})*\d+)(,|$)', end_pos, 1, NULL, 1)
FROM strings
WHERE end_pos > 0
)
SELECT term
FROM strings
WHERE end_pos > 0;
其中,对于示例数据:
CREATE TABLE table_name ( value ) AS
select '1,2,,2,3,4,,,4,5, , ,, , ,, , ,,,,,,5,6' from dual;
输出:
TERM 1 2,,2 3 4,,,4 5, , ,, , ,, , ,,,,,,5 6
可能适用于早期版本的替代方法是:
SELECT RTRIM(
REGEXP_SUBSTR(
t.value,
'((\d+(, *){2,})*\d+)(,|$)',
1,
l.COLUMN_VALUE
),
','
) AS term
FROM table_name t
CROSS JOIN
TABLE(
CAST(
MULTISET(
SELECT LEVEL
FROM DUAL
CONNECT BY
REGEXP_INSTR(
t.value,
'((\d+(, *){2,})*\d+)(,|$)',
1,
LEVEL
) > 0
)
AS SYS.ODCINUMBERLIST
)
) l
sqlfiddle here