REGEXP 捕获由一组定界符定界的值
REGEXP to capture values delimited by a set of delimiters
我的列值如下所示:[只是我创建的一个示例]
{BASICINFOxxxFyyy100x} {CONTACTxxx12345yyy20202x}
它可以包含 0 个或多个数据块...我创建了以下查询来拆分块
with x as
(select
'{BASICINFOxxxFyyy100x}{CONTACTxxx12345yyy20202x}' a from dual)
select REGEXP_SUBSTR(a,'({.*?x})',1,rownum,null,1)
from x
connect by rownum <= REGEXP_COUNT(a,'x}')
但是我想进一步将输出分成 3 列,如下所示:
ColumnA | ColumnB | ColumnC
------------------------------
BASICINFO | F |100
CONTACT | 12345 |20202
分隔符始终是标准的。我未能创建一个漂亮的查询来提供我想要的输出。
提前致谢。
SQL> with x as
2 (select '{BASICINFOxxxFyyy100x}{CONTACTxxx12345yyy20202x}' a from dual
3 ),
4 y as (
5 select REGEXP_SUBSTR(a,'({.*?x})',1,rownum,null,1) c1
6 from x
7 connect by rownum <= REGEXP_COUNT(a,'x}')
8 )
9 select
10 substr(c1,2,instr(c1,'xxx')-2) z1,
11 substr(c1,instr(c1,'xxx')+3,instr(c1,'yyy')-instr(c1,'xxx')-3) z2,
12 rtrim(substr(c1,instr(c1,'yyy')+3),'x}') z3
13 from y;
Z1 Z2 Z3
--------------- --------------- ---------------
BASICINFO F 100
CONTACT 12345 20202
Oracle 11g R2 架构设置:
CREATE TABLE your_table ( str ) AS
SELECT '{BASICINFOxxxFyyy100x}{CONTACTxxx12345yyy20202x}' from dual
/
查询 1:
select REGEXP_SUBSTR(
t.str,
'\{([^}]*?)xxx([^}]*?)yyy([^}]*?)x\}',
1,
l.COLUMN_VALUE,
NULL,
1
) AS col1,
REGEXP_SUBSTR(
str,
'\{([^}]*?)xxx([^}]*?)yyy([^}]*?)x\}',
1,
l.COLUMN_VALUE,
NULL,
2
) AS col2,
REGEXP_SUBSTR(
str,
'\{([^}]*?)xxx([^}]*?)yyy([^}]*?)x\}',
1,
l.COLUMN_VALUE,
NULL,
3
) AS col3
FROM your_table t
CROSS JOIN
TABLE(
CAST(
MULTISET(
SELECT LEVEL
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.str,'\{([^}]*?)xxx([^}]*?)yyy([^}]*?)x\}')
) AS SYS.ODCINUMBERLIST
)
) l
| COL1 | COL2 | COL3 |
|-----------|-------|-------|
| BASICINFO | F | 100 |
| CONTACT | 12345 | 20202 |
注:
您的查询:
select REGEXP_SUBSTR(a,'({.*?x})',1,rownum,null,1)
from x
connect by rownum <= REGEXP_COUNT(a,'x}')
当您有多行输入时将不起作用 - 在 CONNECT BY
子句中,分层查询没有任何限制将 Row1-Level2 连接到 Row1-Level1 或 Row2-Level1,因此它将连接它对两者都适用,并且随着层次结构的深度变大,它将创建成倍增加的输出行副本。您可以使用一些 hack 来阻止这种情况,但是将行生成器放入相关的子查询中会更有效,然后可以 CROSS JOIN
ed 回到原始的 table(它是相关的所以它不会连接到错误的行)如果你要使用分层查询。
更好的办法是修复您的数据结构,这样您就不会在分隔字符串中存储多个值。
这是另一种解决方案,它是从您离开的地方导出的。您的查询已经导致将一行拆分为 2 行。下面将分为 3 列:
WITH x
AS (SELECT '{BASICINFOxxxFyyy100x}{CONTACTxxx12345yyy20202x}' a
FROM DUAL),
-- Your query result here
tbl
AS ( SELECT REGEXP_SUBSTR (a,
'({.*?x})',
1,
ROWNUM,
NULL,
1)
Col
FROM x
CONNECT BY ROWNUM <= REGEXP_COUNT (a, 'x}'))
--- Actual Query
SELECT col,
REGEXP_SUBSTR (col,
'(.*?{)([^x]+)',
1,
1,
'',
2)
AS COL1,
REGEXP_SUBSTR (REGEXP_SUBSTR (col,
'(.*?)([^x]+)',
1,
2,
'',
2),
'[^y]+',
1,
1)
AS COL2,
REGEXP_SUBSTR (REGEXP_SUBSTR (col,
'[^y]+x',
1,
2),
'[^x]+',
1,
1)
AS COL3
FROM tbl;
输出:
SQL> /
COL COL1 COL2 COL3
------------------------------------------------ ------------------------------------------------ ------------------------------------------------ ------------------------------------------------
{BASICINFOxxxFyyy100x} BASICINFO F 100
{CONTACTxxx12345yyy20202x} CONTACT 12345 20202
我的列值如下所示:[只是我创建的一个示例]
{BASICINFOxxxFyyy100x} {CONTACTxxx12345yyy20202x}
它可以包含 0 个或多个数据块...我创建了以下查询来拆分块
with x as
(select
'{BASICINFOxxxFyyy100x}{CONTACTxxx12345yyy20202x}' a from dual)
select REGEXP_SUBSTR(a,'({.*?x})',1,rownum,null,1)
from x
connect by rownum <= REGEXP_COUNT(a,'x}')
但是我想进一步将输出分成 3 列,如下所示:
ColumnA | ColumnB | ColumnC
------------------------------
BASICINFO | F |100
CONTACT | 12345 |20202
分隔符始终是标准的。我未能创建一个漂亮的查询来提供我想要的输出。
提前致谢。
SQL> with x as
2 (select '{BASICINFOxxxFyyy100x}{CONTACTxxx12345yyy20202x}' a from dual
3 ),
4 y as (
5 select REGEXP_SUBSTR(a,'({.*?x})',1,rownum,null,1) c1
6 from x
7 connect by rownum <= REGEXP_COUNT(a,'x}')
8 )
9 select
10 substr(c1,2,instr(c1,'xxx')-2) z1,
11 substr(c1,instr(c1,'xxx')+3,instr(c1,'yyy')-instr(c1,'xxx')-3) z2,
12 rtrim(substr(c1,instr(c1,'yyy')+3),'x}') z3
13 from y;
Z1 Z2 Z3
--------------- --------------- ---------------
BASICINFO F 100
CONTACT 12345 20202
Oracle 11g R2 架构设置:
CREATE TABLE your_table ( str ) AS
SELECT '{BASICINFOxxxFyyy100x}{CONTACTxxx12345yyy20202x}' from dual
/
查询 1:
select REGEXP_SUBSTR(
t.str,
'\{([^}]*?)xxx([^}]*?)yyy([^}]*?)x\}',
1,
l.COLUMN_VALUE,
NULL,
1
) AS col1,
REGEXP_SUBSTR(
str,
'\{([^}]*?)xxx([^}]*?)yyy([^}]*?)x\}',
1,
l.COLUMN_VALUE,
NULL,
2
) AS col2,
REGEXP_SUBSTR(
str,
'\{([^}]*?)xxx([^}]*?)yyy([^}]*?)x\}',
1,
l.COLUMN_VALUE,
NULL,
3
) AS col3
FROM your_table t
CROSS JOIN
TABLE(
CAST(
MULTISET(
SELECT LEVEL
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.str,'\{([^}]*?)xxx([^}]*?)yyy([^}]*?)x\}')
) AS SYS.ODCINUMBERLIST
)
) l
| COL1 | COL2 | COL3 |
|-----------|-------|-------|
| BASICINFO | F | 100 |
| CONTACT | 12345 | 20202 |
注:
您的查询:
select REGEXP_SUBSTR(a,'({.*?x})',1,rownum,null,1)
from x
connect by rownum <= REGEXP_COUNT(a,'x}')
当您有多行输入时将不起作用 - 在 CONNECT BY
子句中,分层查询没有任何限制将 Row1-Level2 连接到 Row1-Level1 或 Row2-Level1,因此它将连接它对两者都适用,并且随着层次结构的深度变大,它将创建成倍增加的输出行副本。您可以使用一些 hack 来阻止这种情况,但是将行生成器放入相关的子查询中会更有效,然后可以 CROSS JOIN
ed 回到原始的 table(它是相关的所以它不会连接到错误的行)如果你要使用分层查询。
更好的办法是修复您的数据结构,这样您就不会在分隔字符串中存储多个值。
这是另一种解决方案,它是从您离开的地方导出的。您的查询已经导致将一行拆分为 2 行。下面将分为 3 列:
WITH x
AS (SELECT '{BASICINFOxxxFyyy100x}{CONTACTxxx12345yyy20202x}' a
FROM DUAL),
-- Your query result here
tbl
AS ( SELECT REGEXP_SUBSTR (a,
'({.*?x})',
1,
ROWNUM,
NULL,
1)
Col
FROM x
CONNECT BY ROWNUM <= REGEXP_COUNT (a, 'x}'))
--- Actual Query
SELECT col,
REGEXP_SUBSTR (col,
'(.*?{)([^x]+)',
1,
1,
'',
2)
AS COL1,
REGEXP_SUBSTR (REGEXP_SUBSTR (col,
'(.*?)([^x]+)',
1,
2,
'',
2),
'[^y]+',
1,
1)
AS COL2,
REGEXP_SUBSTR (REGEXP_SUBSTR (col,
'[^y]+x',
1,
2),
'[^x]+',
1,
1)
AS COL3
FROM tbl;
输出:
SQL> /
COL COL1 COL2 COL3
------------------------------------------------ ------------------------------------------------ ------------------------------------------------ ------------------------------------------------
{BASICINFOxxxFyyy100x} BASICINFO F 100
{CONTACTxxx12345yyy20202x} CONTACT 12345 20202