Oracle REGEXP_SUBSTR 不会匹配点字符
Oracle REGEXP_SUBSTR will not match the dot character
我正在尝试从以下字符串中提取信息:
FOO-BAR-AUDIT-DATABASE.NUPKG
FOO.BAR.DATABASE-2.0.0.NUPKG
信息如下:
'FOO.BAR.DATABASE' '2.0.0'
| |
module_name version
目前,当 module_name 部分包含 .
个字符时,我无法正确解析。请参阅下面的 table。
下面的例子展示了我是如何提取信息的。
第一组正则表达式无法正常工作'(.*?)
,其余组处理不同版本信息的情况。
select case module_name when expected then 'pass' else 'fail' end as test, y.* from(
select lower(regexp_substr(t.pck, g.regex, 1, 1, '', 1)) as module_name,
t.expected,
to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 3)) as major,
to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 5)) as minor,
to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 7)) as patch,
(t.pck) as package_name
from (select 'FUNKY_LOG_DATABASE-1.0.0.NUPKG' as pck, 'funky_log_database' as expected from dual
union select 'FOO.BAR.DATABASE-2.0.0.NUPKG', 'foo.bar.database' from dual
union select 'FOO-BAR-AUDIT-DATABASE.NUPKG', 'foo-bar-audit-database' from dual
union select 'funk-database-1.nupkg', 'funk-database' from dual
union select 'funk-database-1.2.nupkg', 'funk-database' from dual
union select 'baz-database-1.0.1.nupkg', 'baz-database' from dual) t
cross join (select '(.*?)(-(\d+)(\.(\d+))?(\.(\d+))?)?(\..*)' as regex from dual) g
)y;
上面的查询产生以下结果 (Oracle 19c):
test
module_name
expected
major
minor
patch
package_name
pass
foo-bar-audit-database
foo-bar-audit-database
FOO-BAR-AUDIT-DATABASE.NUPKG
fail
foo
foo.bar.database
FOO.BAR.DATABASE-2.0.0.NUPKG
pass
funky_log_database
funky_log_database
1
0
0
FUNKY_LOG_DATABASE-1.0.0.NUPKG
pass
baz-database
baz-database
1
0
1
baz-database-1.0.1.nupkg
pass
funk-database
funk-database
1
2
funk-database-1.2.nupkg
pass
funk-database
funk-database
1
funk-database-1.nupkg
我试过使用 ([[:alnum:]._-]*?)
作为第一组,但结果相同。切换到 greedy 匹配匹配太多。
有什么好的建议吗?
这样可以吗?它并不 复杂 ,而是 - return 你想要的数据(至少,我认为是这样)。
- 第 1 - 8 行 - 样本数据
temp
CTE:删除扩展名 (.nupkg),为简单起见[=26=]
- 最终查询:
- 第 18 行是
module name
;如果它包含数字,则获取第一个数字的子字符串。否则,删除整个 PCT 值
- 第 20 - 22 行 return
version
:如果没有数字,return NULL。否则,return 从第一个数字开始的子串
SQL> with
2 test as
3 (select 'FUNKY_LOG_DATABASE-1.0.0.NUPKG' as pck, 'funky_log_database' as expected from dual
4 union select 'FOO.BAR.DATABASE-2.0.0.NUPKG', 'foo.bar.database' from dual
5 union select 'FOO-BAR-AUDIT-DATABASE.NUPKG', 'foo-bar-audit-database' from dual
6 union select 'funk-database-1.nupkg', 'funk-database' from dual
7 union select 'funk-database-1.2.nupkg', 'funk-database' from dual
8 union select 'baz-database-1.0.1.nupkg', 'baz-database' from dual),
9 temp as
10 -- remove extension
11 (select pck pck_old, expected,
12 replace(lower(pck), '.nupkg', '') pck
13 from test
14 )
15 select pck_old,
16 expected,
17 --
18 nvl(substr(pck, 1, regexp_instr(pck, '\d') - 2), pck) module_name,
19 --
20 case when regexp_instr(pck, '\d') = 0 then null
21 else substr(pck, regexp_instr(pck, '\d'))
22 end version
23 from temp;
PCK_OLD EXPECTED MODULE_NAME VERSION
------------------------------ ---------------------- ----------------------- --------
FOO-BAR-AUDIT-DATABASE.NUPKG foo-bar-audit-database foo-bar-audit-database
FOO.BAR.DATABASE-2.0.0.NUPKG foo.bar.database foo.bar.database 2.0.0
FUNKY_LOG_DATABASE-1.0.0.NUPKG funky_log_database funky_log_database 1.0.0
baz-database-1.0.1.nupkg baz-database baz-database 1.0.1
funk-database-1.2.nupkg funk-database funk-database 1.2
funk-database-1.nupkg funk-database funk-database 1
6 rows selected.
SQL>
可以从最后匹配得到版本,然后提取版本前的子串得到模块名:
select case module_name when expected then 'pass' else 'fail' end as test,
y.*
from (
select lower(
substr(
t.pck,
1,
REGEXP_INSTR(t.pck, g.regex) - 1
)
) as module_name,
t.expected,
to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 2)) as major,
to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 3)) as minor,
to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 4)) as patch,
t.pck as package_name
from (
select 'FUNKY_LOG_DATABASE-1.0.0.NUPKG' as pck, 'funky_log_database' as expected from dual
union select 'FOO.BAR.DATABASE-2.0.0.NUPKG', 'foo.bar.database' from dual
union select 'FOO-BAR-AUDIT-DATABASE.NUPKG', 'foo-bar-audit-database' from dual
union select 'funk-database-1.nupkg', 'funk-database' from dual
union select 'funk-database-1.2.nupkg', 'funk-database' from dual
union select 'baz-database-1.0.1.nupkg', 'baz-database' from dual
) t
cross join (
select '(-(\d+)\.?(\d+)?\.?(\d+)?)?\.[^.]+$' as regex from dual
) g
)y;
输出:
TEST
MODULE_NAME
EXPECTED
MAJOR
MINOR
PATCH
PACKAGE_NAME
pass
foo-bar-audit-database
foo-bar-audit-database
FOO-BAR-AUDIT-DATABASE.NUPKG
pass
foo.bar.database
foo.bar.database
2
0
0
FOO.BAR.DATABASE-2.0.0.NUPKG
pass
funky_log_database
funky_log_database
1
0
0
FUNKY_LOG_DATABASE-1.0.0.NUPKG
pass
baz-database
baz-database
1
0
1
baz-database-1.0.1.nupkg
pass
funk-database
funk-database
1
2
funk-database-1.2.nupkg
pass
funk-database
funk-database
1
funk-database-1.nupkg
db<>fiddle here
我正在尝试从以下字符串中提取信息:
FOO-BAR-AUDIT-DATABASE.NUPKG
FOO.BAR.DATABASE-2.0.0.NUPKG
信息如下:
'FOO.BAR.DATABASE' '2.0.0'
| |
module_name version
目前,当 module_name 部分包含 .
个字符时,我无法正确解析。请参阅下面的 table。
下面的例子展示了我是如何提取信息的。
第一组正则表达式无法正常工作'(.*?)
,其余组处理不同版本信息的情况。
select case module_name when expected then 'pass' else 'fail' end as test, y.* from(
select lower(regexp_substr(t.pck, g.regex, 1, 1, '', 1)) as module_name,
t.expected,
to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 3)) as major,
to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 5)) as minor,
to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 7)) as patch,
(t.pck) as package_name
from (select 'FUNKY_LOG_DATABASE-1.0.0.NUPKG' as pck, 'funky_log_database' as expected from dual
union select 'FOO.BAR.DATABASE-2.0.0.NUPKG', 'foo.bar.database' from dual
union select 'FOO-BAR-AUDIT-DATABASE.NUPKG', 'foo-bar-audit-database' from dual
union select 'funk-database-1.nupkg', 'funk-database' from dual
union select 'funk-database-1.2.nupkg', 'funk-database' from dual
union select 'baz-database-1.0.1.nupkg', 'baz-database' from dual) t
cross join (select '(.*?)(-(\d+)(\.(\d+))?(\.(\d+))?)?(\..*)' as regex from dual) g
)y;
上面的查询产生以下结果 (Oracle 19c):
test | module_name | expected | major | minor | patch | package_name |
---|---|---|---|---|---|---|
pass | foo-bar-audit-database | foo-bar-audit-database | FOO-BAR-AUDIT-DATABASE.NUPKG | |||
fail | foo | foo.bar.database | FOO.BAR.DATABASE-2.0.0.NUPKG | |||
pass | funky_log_database | funky_log_database | 1 | 0 | 0 | FUNKY_LOG_DATABASE-1.0.0.NUPKG |
pass | baz-database | baz-database | 1 | 0 | 1 | baz-database-1.0.1.nupkg |
pass | funk-database | funk-database | 1 | 2 | funk-database-1.2.nupkg | |
pass | funk-database | funk-database | 1 | funk-database-1.nupkg |
我试过使用 ([[:alnum:]._-]*?)
作为第一组,但结果相同。切换到 greedy 匹配匹配太多。
有什么好的建议吗?
这样可以吗?它并不 复杂 ,而是 - return 你想要的数据(至少,我认为是这样)。
- 第 1 - 8 行 - 样本数据
temp
CTE:删除扩展名 (.nupkg),为简单起见[=26=]- 最终查询:
- 第 18 行是
module name
;如果它包含数字,则获取第一个数字的子字符串。否则,删除整个 PCT 值 - 第 20 - 22 行 return
version
:如果没有数字,return NULL。否则,return 从第一个数字开始的子串
- 第 18 行是
SQL> with
2 test as
3 (select 'FUNKY_LOG_DATABASE-1.0.0.NUPKG' as pck, 'funky_log_database' as expected from dual
4 union select 'FOO.BAR.DATABASE-2.0.0.NUPKG', 'foo.bar.database' from dual
5 union select 'FOO-BAR-AUDIT-DATABASE.NUPKG', 'foo-bar-audit-database' from dual
6 union select 'funk-database-1.nupkg', 'funk-database' from dual
7 union select 'funk-database-1.2.nupkg', 'funk-database' from dual
8 union select 'baz-database-1.0.1.nupkg', 'baz-database' from dual),
9 temp as
10 -- remove extension
11 (select pck pck_old, expected,
12 replace(lower(pck), '.nupkg', '') pck
13 from test
14 )
15 select pck_old,
16 expected,
17 --
18 nvl(substr(pck, 1, regexp_instr(pck, '\d') - 2), pck) module_name,
19 --
20 case when regexp_instr(pck, '\d') = 0 then null
21 else substr(pck, regexp_instr(pck, '\d'))
22 end version
23 from temp;
PCK_OLD EXPECTED MODULE_NAME VERSION
------------------------------ ---------------------- ----------------------- --------
FOO-BAR-AUDIT-DATABASE.NUPKG foo-bar-audit-database foo-bar-audit-database
FOO.BAR.DATABASE-2.0.0.NUPKG foo.bar.database foo.bar.database 2.0.0
FUNKY_LOG_DATABASE-1.0.0.NUPKG funky_log_database funky_log_database 1.0.0
baz-database-1.0.1.nupkg baz-database baz-database 1.0.1
funk-database-1.2.nupkg funk-database funk-database 1.2
funk-database-1.nupkg funk-database funk-database 1
6 rows selected.
SQL>
可以从最后匹配得到版本,然后提取版本前的子串得到模块名:
select case module_name when expected then 'pass' else 'fail' end as test,
y.*
from (
select lower(
substr(
t.pck,
1,
REGEXP_INSTR(t.pck, g.regex) - 1
)
) as module_name,
t.expected,
to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 2)) as major,
to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 3)) as minor,
to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 4)) as patch,
t.pck as package_name
from (
select 'FUNKY_LOG_DATABASE-1.0.0.NUPKG' as pck, 'funky_log_database' as expected from dual
union select 'FOO.BAR.DATABASE-2.0.0.NUPKG', 'foo.bar.database' from dual
union select 'FOO-BAR-AUDIT-DATABASE.NUPKG', 'foo-bar-audit-database' from dual
union select 'funk-database-1.nupkg', 'funk-database' from dual
union select 'funk-database-1.2.nupkg', 'funk-database' from dual
union select 'baz-database-1.0.1.nupkg', 'baz-database' from dual
) t
cross join (
select '(-(\d+)\.?(\d+)?\.?(\d+)?)?\.[^.]+$' as regex from dual
) g
)y;
输出:
TEST MODULE_NAME EXPECTED MAJOR MINOR PATCH PACKAGE_NAME pass foo-bar-audit-database foo-bar-audit-database FOO-BAR-AUDIT-DATABASE.NUPKG pass foo.bar.database foo.bar.database 2 0 0 FOO.BAR.DATABASE-2.0.0.NUPKG pass funky_log_database funky_log_database 1 0 0 FUNKY_LOG_DATABASE-1.0.0.NUPKG pass baz-database baz-database 1 0 1 baz-database-1.0.1.nupkg pass funk-database funk-database 1 2 funk-database-1.2.nupkg pass funk-database funk-database 1 funk-database-1.nupkg
db<>fiddle here