如何使用REGEXP_SUBSTR解析数据?
How to parse data using REGEXP_SUBSTR?
我有一个这样的数据集(见下文),我尝试提取格式为 {variable_number_of_digits}{hyphen}{only_one_digit}:
的数字
with mcte as (
select 'ILLD/ELKJS/00000000/ELKJS/FHSH' as addr from dual
union all
select 'ILLD/EFECTE/0116988-7-002/ADFA/ADFG' as addr from dual
union all
select 'IIODK/1573230-0/2216755-7/' as addr from dual
union all
select 'IIODK/1573230-0/2216755-700/WRITE' as addr from dual
)
select addr,
REGEXP_SUBSTR(addr,'(\/)([0-9-]+)',1,1,NULL,2) AS num1,
REGEXP_SUBSTR(addr,'(\/)([^\/]+\/)([0-9\-]+)',1,1,NULL,3) num2
from mcte
;
我没有得到正确的结果集,应该如下所示
+-------------------------------------+-----------+-----------+
| ADDR | NUM1 | NUM2 |
+-------------------------------------+-----------+-----------+
| ILLD/ELKJS/00000000/ELKJS/FHSH | NULL | NULL |
| ILLD/EFECTE/0116988-7-002/ADFA/ADFG | NULL | NULL |
| IIODK/1573230-0/2216755-7/ | 1573230-0 | 2216755-7 |
| IIODK/1573230-0/2216755-700/WRITE | 1573230-0 | NULL |
+-------------------------------------+-----------+-----------+
如何实现?
I try to extract digits which are in form {variable_number_of_digits}{hyphen}{only_one_digit}
要匹配这种格式的数字,您应该这样做。
正则表达式: \/\d+-\d
如果您想从第二个和第三个 /
分隔组中获取结果,则:
with mcte ( addr ) as (
select 'ILLD/ELKJS/00000000/ELKJS/FHSH' from dual union all
select 'ILLD/EFECTE/0116988-7-002/ADFA/ADFG' from dual union all
select 'IIODK/1573230-0/2216755-7/' from dual union all
select 'IIODK/1573230-0/2216755-700/WRITE' from dual union all
select 'IIODK/TEST/1573230-0/2216755-700/WRITE' from dual
)
select addr,
REGEXP_SUBSTR(addr,'^[^/]*/(\d+-\d)/',1,1,NULL,1) AS num1,
REGEXP_SUBSTR(addr,'^[^/]*/[^/]*/(\d+-\d)/',1,1,NULL,1) num2
from mcte;
输出:
ADDR NUM1 NUM2
-------------------------------------- ------------------- -------------------
ILLD/ELKJS/00000000/ELKJS/FHSH
ILLD/EFECTE/0116988-7-002/ADFA/ADFG
IIODK/1573230-0/2216755-7/ 1573230-0 2216755-7
IIODK/1573230-0/2216755-700/WRITE 1573230-0
IIODK/TEST/1573230-0/2216755-700/WRITE 1573230-0
更新:
如果您只想匹配第一个和第二个模式而不关心它们在字符串中的位置,那么:
with mcte ( addr ) as (
select 'ILLD/ELKJS/00000000/ELKJS/FHSH' from dual union all
select 'ILLD/EFECTE/0116988-7-002/ADFA/ADFG' from dual union all
select 'IIODK/1573230-0/2216755-7/' from dual union all
select 'IIODK/1573230-0/2216755-700/WRITE' from dual union all
select 'IIODK/TEST/1573230-0/2216755-700/WRITE' from dual union all
select '1234567-8' from dual union all
select '1234567-8/9876543-2' from dual union all
select '1234567-8/TEST/9876543-2' from dual
)
select addr,
REGEXP_SUBSTR(addr,'(^|/)(\d+-\d)(/|$)',1,1,NULL,2) AS num1,
REGEXP_SUBSTR(addr,'(^|/)\d+-\d(/.+?)?/(\d+-\d)(/|$)',1,1,NULL,3) num2
from mcte;
输出:
ADDR NUM1 NUM2
-------------------------------------- ------------------- ------------------
ILLD/ELKJS/00000000/ELKJS/FHSH
ILLD/EFECTE/0116988-7-002/ADFA/ADFG
IIODK/1573230-0/2216755-7/ 1573230-0 2216755-7
IIODK/1573230-0/2216755-700/WRITE 1573230-0
IIODK/TEST/1573230-0/2216755-700/WRITE 1573230-0
1234567-8 1234567-8
1234567-8/9876543-2 1234567-8 9876543-2
1234567-8/TEST/9876543-2 1234567-8 9876543-2
将 delimiter split query 与 REGEXP_LIKE
和 pivot 相结合,您得到的结果最多可用于 6 个数字。您将需要更新 cols
子查询和 pivot
列表,以便能够处理每条记录的更多数字。 (不幸的是,这不能在静态 SQL 中完成)。
with mcte as (
select 1 id, 'ILLD/ELKJS/00000000/ELKJS/FHSH' as addr from dual
union all
select 2 id, 'ILLD/EFECTE/0116988-7-002/ADFA/ADFG' as addr from dual
union all
select 3 id, 'IIODK/1573230-0/2216755-7/' as addr from dual
union all
select 4 id, '1-1/1573230-0/2216755-700/676-7' as addr from dual
),
cols as (select rownum colnum from dual connect by level < 6 /* (max) number of columns */),
mcte2 as (select id, cols.colnum, (regexp_substr(addr,'[^/]+', 1, cols.colnum)) addr
from mcte, cols where regexp_substr(addr, '[^/]+', 1, cols.colnum) is not null),
mcte3 as (
select ID,
ROW_NUMBER() over (partition by ID order by COLNUM) as col_no, ADDR from mcte2
where REGEXP_like(addr, '^[0-9]+-[0-9]$')
)
select * from mcte3
PIVOT (max(addr) for (col_no) in
(1 as "NUM1",
2 as "NUM2",
3 as "NUM3",
4 as "NUM4",
5 as "NUM5",
6 as "NUM6"))
order by id;
这给出了结果
ID NUM1 NUM2 NUM3 NUM4 NUM5 NUM6
---------- ---------- ---------- ---------- ---------- ---------- ----------
3 1573230-0 2216755-7
4 1-1 1573230-0 676-7
我有一个这样的数据集(见下文),我尝试提取格式为 {variable_number_of_digits}{hyphen}{only_one_digit}:
的数字with mcte as (
select 'ILLD/ELKJS/00000000/ELKJS/FHSH' as addr from dual
union all
select 'ILLD/EFECTE/0116988-7-002/ADFA/ADFG' as addr from dual
union all
select 'IIODK/1573230-0/2216755-7/' as addr from dual
union all
select 'IIODK/1573230-0/2216755-700/WRITE' as addr from dual
)
select addr,
REGEXP_SUBSTR(addr,'(\/)([0-9-]+)',1,1,NULL,2) AS num1,
REGEXP_SUBSTR(addr,'(\/)([^\/]+\/)([0-9\-]+)',1,1,NULL,3) num2
from mcte
;
我没有得到正确的结果集,应该如下所示
+-------------------------------------+-----------+-----------+
| ADDR | NUM1 | NUM2 |
+-------------------------------------+-----------+-----------+
| ILLD/ELKJS/00000000/ELKJS/FHSH | NULL | NULL |
| ILLD/EFECTE/0116988-7-002/ADFA/ADFG | NULL | NULL |
| IIODK/1573230-0/2216755-7/ | 1573230-0 | 2216755-7 |
| IIODK/1573230-0/2216755-700/WRITE | 1573230-0 | NULL |
+-------------------------------------+-----------+-----------+
如何实现?
I try to extract digits which are in form {variable_number_of_digits}{hyphen}{only_one_digit}
要匹配这种格式的数字,您应该这样做。
正则表达式: \/\d+-\d
如果您想从第二个和第三个 /
分隔组中获取结果,则:
with mcte ( addr ) as (
select 'ILLD/ELKJS/00000000/ELKJS/FHSH' from dual union all
select 'ILLD/EFECTE/0116988-7-002/ADFA/ADFG' from dual union all
select 'IIODK/1573230-0/2216755-7/' from dual union all
select 'IIODK/1573230-0/2216755-700/WRITE' from dual union all
select 'IIODK/TEST/1573230-0/2216755-700/WRITE' from dual
)
select addr,
REGEXP_SUBSTR(addr,'^[^/]*/(\d+-\d)/',1,1,NULL,1) AS num1,
REGEXP_SUBSTR(addr,'^[^/]*/[^/]*/(\d+-\d)/',1,1,NULL,1) num2
from mcte;
输出:
ADDR NUM1 NUM2
-------------------------------------- ------------------- -------------------
ILLD/ELKJS/00000000/ELKJS/FHSH
ILLD/EFECTE/0116988-7-002/ADFA/ADFG
IIODK/1573230-0/2216755-7/ 1573230-0 2216755-7
IIODK/1573230-0/2216755-700/WRITE 1573230-0
IIODK/TEST/1573230-0/2216755-700/WRITE 1573230-0
更新:
如果您只想匹配第一个和第二个模式而不关心它们在字符串中的位置,那么:
with mcte ( addr ) as (
select 'ILLD/ELKJS/00000000/ELKJS/FHSH' from dual union all
select 'ILLD/EFECTE/0116988-7-002/ADFA/ADFG' from dual union all
select 'IIODK/1573230-0/2216755-7/' from dual union all
select 'IIODK/1573230-0/2216755-700/WRITE' from dual union all
select 'IIODK/TEST/1573230-0/2216755-700/WRITE' from dual union all
select '1234567-8' from dual union all
select '1234567-8/9876543-2' from dual union all
select '1234567-8/TEST/9876543-2' from dual
)
select addr,
REGEXP_SUBSTR(addr,'(^|/)(\d+-\d)(/|$)',1,1,NULL,2) AS num1,
REGEXP_SUBSTR(addr,'(^|/)\d+-\d(/.+?)?/(\d+-\d)(/|$)',1,1,NULL,3) num2
from mcte;
输出:
ADDR NUM1 NUM2
-------------------------------------- ------------------- ------------------
ILLD/ELKJS/00000000/ELKJS/FHSH
ILLD/EFECTE/0116988-7-002/ADFA/ADFG
IIODK/1573230-0/2216755-7/ 1573230-0 2216755-7
IIODK/1573230-0/2216755-700/WRITE 1573230-0
IIODK/TEST/1573230-0/2216755-700/WRITE 1573230-0
1234567-8 1234567-8
1234567-8/9876543-2 1234567-8 9876543-2
1234567-8/TEST/9876543-2 1234567-8 9876543-2
将 delimiter split query 与 REGEXP_LIKE
和 pivot 相结合,您得到的结果最多可用于 6 个数字。您将需要更新 cols
子查询和 pivot
列表,以便能够处理每条记录的更多数字。 (不幸的是,这不能在静态 SQL 中完成)。
with mcte as (
select 1 id, 'ILLD/ELKJS/00000000/ELKJS/FHSH' as addr from dual
union all
select 2 id, 'ILLD/EFECTE/0116988-7-002/ADFA/ADFG' as addr from dual
union all
select 3 id, 'IIODK/1573230-0/2216755-7/' as addr from dual
union all
select 4 id, '1-1/1573230-0/2216755-700/676-7' as addr from dual
),
cols as (select rownum colnum from dual connect by level < 6 /* (max) number of columns */),
mcte2 as (select id, cols.colnum, (regexp_substr(addr,'[^/]+', 1, cols.colnum)) addr
from mcte, cols where regexp_substr(addr, '[^/]+', 1, cols.colnum) is not null),
mcte3 as (
select ID,
ROW_NUMBER() over (partition by ID order by COLNUM) as col_no, ADDR from mcte2
where REGEXP_like(addr, '^[0-9]+-[0-9]$')
)
select * from mcte3
PIVOT (max(addr) for (col_no) in
(1 as "NUM1",
2 as "NUM2",
3 as "NUM3",
4 as "NUM4",
5 as "NUM5",
6 as "NUM6"))
order by id;
这给出了结果
ID NUM1 NUM2 NUM3 NUM4 NUM5 NUM6
---------- ---------- ---------- ---------- ---------- ---------- ----------
3 1573230-0 2216755-7
4 1-1 1573230-0 676-7