oracle正则表达式拆分但变通方法括号
oracle regular expression split but workaround brackets
我有一个像这样的字符串:“Att1:Val1,[Txt1,Txt2:Txt3]:Val2”
使用 Oracle Sql,我想实现如下拆分成行和列:
lvl
substr2
substr3
1
Att1
Val1
2
[Txt1,Txt2:Txt3]
Val2
我试过下面的代码,但无法弄清楚如何不使用逗号和冒号分隔括号之间的值
with WTBL as
(
select 'Att1:Val1,[Txt1,Txt2:Txt3]:Val2,' as WCLN
from dual
)
select lvl, substr1, substr2, substr3, WCLN
from WTBL
cross join xmltable('if (contains($PRM,","))
then
let $list := ora:tokenize($PRM, ","),
$cnt := count($list)
for $val at $r in $list
where $r < $cnt
return $val
else $PRM'
passing WCLN as PRM
columns substr1 varchar2(4000) path '.'
,substr2 varchar2(4000) path 'if (contains( . , ":")) then
let $list := ora:tokenize( . ,":"),
$cnt := count($list)
for $val at $r in $list
where $r = $cnt - 1
return $val
else . '
,substr3 varchar2(4000) path 'if (contains( . , ":")) then
let $list := ora:tokenize( . ,":"),
$cnt := count($list)
for $val at $r in $list
where $r = $cnt
return $val
else . '
,lvl FOR ORDINALITY
) xm
非常感谢您的帮助!
弗拉德
您可以使用正则表达式 (\[.*?\]|.*?):(.*?)(,|$)
和递归 sub-query:
WITH matches (value, lvl, substr1, substr2, epos) AS (
SELECT value,
1,
REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', 1, 1, NULL, 1),
REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', 1, 1, NULL, 2),
REGEXP_INSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', 1, 1, 1)
FROM table_name
UNION ALL
SELECT value,
lvl + 1,
REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', epos, 1, NULL, 1),
REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', epos, 1, NULL, 2),
REGEXP_INSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', epos, 1, 1)
FROM matches
WHERE epos > 0
)
SELECT lvl,
substr1,
substr2
FROM matches
WHERE epos > 0;
或者,简单(更快)的字符串函数:
WITH matches (value, lvl, spos, mpos, epos) AS (
SELECT value,
1,
1,
CASE
WHEN SUBSTR(value, 1, 1) = '['
THEN INSTR(value, ']:', 1) + 1
ELSE INSTR(value, ':', 1)
END,
INSTR(
value,
',',
CASE
WHEN SUBSTR(value, 1, 1) = '['
THEN INSTR(value, ']:', 1) + 1
ELSE INSTR(value, ':', 1)
END
)
FROM table_name
UNION ALL
SELECT value,
lvl + 1,
epos + 1,
CASE
WHEN SUBSTR(value, epos + 1, 1) = '['
THEN INSTR(value, ']:', epos + 1) + 1
ELSE INSTR(value, ':', epos + 1)
END,
INSTR(
value,
',',
CASE
WHEN SUBSTR(value, epos + 1, 1) = '['
THEN INSTR(value, ']:', epos + 1) + 1
ELSE INSTR(value, ':', epos + 1)
END
)
FROM matches
WHERE epos > 0
)
SELECT lvl,
SUBSTR(value, spos, mpos - spos) AS substr1,
CASE epos
WHEN 0
THEN SUBSTR(value, mpos + 1)
ELSE SUBSTR(value, mpos + 1, epos - mpos - 1)
END AS substr2
FROM matches;
其中,对于示例数据:
CREATE TABLE table_name (value) AS
SELECT 'Att1:Val1,[Txt1,Txt2:Txt3]:Val2' FROM DUAL;
双输出:
LVL
SUBSTR1
SUBSTR2
1
Att1
Val1
2
[Txt1,Txt2:Txt3]
Val2
db<>fiddle here
我有一个像这样的字符串:“Att1:Val1,[Txt1,Txt2:Txt3]:Val2”
使用 Oracle Sql,我想实现如下拆分成行和列:
lvl | substr2 | substr3 |
---|---|---|
1 | Att1 | Val1 |
2 | [Txt1,Txt2:Txt3] | Val2 |
我试过下面的代码,但无法弄清楚如何不使用逗号和冒号分隔括号之间的值
with WTBL as
(
select 'Att1:Val1,[Txt1,Txt2:Txt3]:Val2,' as WCLN
from dual
)
select lvl, substr1, substr2, substr3, WCLN
from WTBL
cross join xmltable('if (contains($PRM,","))
then
let $list := ora:tokenize($PRM, ","),
$cnt := count($list)
for $val at $r in $list
where $r < $cnt
return $val
else $PRM'
passing WCLN as PRM
columns substr1 varchar2(4000) path '.'
,substr2 varchar2(4000) path 'if (contains( . , ":")) then
let $list := ora:tokenize( . ,":"),
$cnt := count($list)
for $val at $r in $list
where $r = $cnt - 1
return $val
else . '
,substr3 varchar2(4000) path 'if (contains( . , ":")) then
let $list := ora:tokenize( . ,":"),
$cnt := count($list)
for $val at $r in $list
where $r = $cnt
return $val
else . '
,lvl FOR ORDINALITY
) xm
非常感谢您的帮助!
弗拉德
您可以使用正则表达式 (\[.*?\]|.*?):(.*?)(,|$)
和递归 sub-query:
WITH matches (value, lvl, substr1, substr2, epos) AS (
SELECT value,
1,
REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', 1, 1, NULL, 1),
REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', 1, 1, NULL, 2),
REGEXP_INSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', 1, 1, 1)
FROM table_name
UNION ALL
SELECT value,
lvl + 1,
REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', epos, 1, NULL, 1),
REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', epos, 1, NULL, 2),
REGEXP_INSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', epos, 1, 1)
FROM matches
WHERE epos > 0
)
SELECT lvl,
substr1,
substr2
FROM matches
WHERE epos > 0;
或者,简单(更快)的字符串函数:
WITH matches (value, lvl, spos, mpos, epos) AS (
SELECT value,
1,
1,
CASE
WHEN SUBSTR(value, 1, 1) = '['
THEN INSTR(value, ']:', 1) + 1
ELSE INSTR(value, ':', 1)
END,
INSTR(
value,
',',
CASE
WHEN SUBSTR(value, 1, 1) = '['
THEN INSTR(value, ']:', 1) + 1
ELSE INSTR(value, ':', 1)
END
)
FROM table_name
UNION ALL
SELECT value,
lvl + 1,
epos + 1,
CASE
WHEN SUBSTR(value, epos + 1, 1) = '['
THEN INSTR(value, ']:', epos + 1) + 1
ELSE INSTR(value, ':', epos + 1)
END,
INSTR(
value,
',',
CASE
WHEN SUBSTR(value, epos + 1, 1) = '['
THEN INSTR(value, ']:', epos + 1) + 1
ELSE INSTR(value, ':', epos + 1)
END
)
FROM matches
WHERE epos > 0
)
SELECT lvl,
SUBSTR(value, spos, mpos - spos) AS substr1,
CASE epos
WHEN 0
THEN SUBSTR(value, mpos + 1)
ELSE SUBSTR(value, mpos + 1, epos - mpos - 1)
END AS substr2
FROM matches;
其中,对于示例数据:
CREATE TABLE table_name (value) AS
SELECT 'Att1:Val1,[Txt1,Txt2:Txt3]:Val2' FROM DUAL;
双输出:
LVL SUBSTR1 SUBSTR2 1 Att1 Val1 2 [Txt1,Txt2:Txt3] Val2
db<>fiddle here