提取 CLOB 数据以进行插入
Extract CLOB data for insert
我有这样的 CLOB 数据:
123456 (LED TV); 234543 (LED light); 654876 (LED monitor);
现在我需要使用定界符(在我的例子中是“;”)和 select 用于插入目标 table 但仅当记录不存在时。
我看过几个 Instr & Substr 或 Regexp 的例子,但没有我需要的或超出我对 Oracle 的理解。有人能给我一个例子,说明如何根据 CLOB 中的字符串将 CLOB 拆分成行,以便稍后插入时使用它吗?
P.S.: 我更喜欢最快的解决方案,因为我的 CLOB 数据可能包含超过 500 万条发票记录。最后将是一个从 C# 触发的存储过程,但这部分让我很头疼...如有任何帮助 - 在此先感谢!
举个例子。
先测试用例; test
table 包含源数据:
SQL> create table test (col clob);
Table created.
SQL> insert into test
2 select '123456 (LED TV); 234543 (LED light); 654876 (LED monitor);' from dual union all
3 select '665988 (Notebook); 987654 (Mouse); 445577 (Dead Pixel);' from dual;
2 rows created.
SQL>
Target
table 将包含从源中提取的值:
SQL> create table target (itn number, name varchar2(20));
Table created.
SQL> -- This value shouldn't be inserted as it already exists in the TARGET table:
SQL> insert into target values (234543, 'LED light');
1 row created.
SQL>
现在,一些有用的东西。这个想法是将列值拆分为行(这就是分层查询中的 regexp_substr
部分所做的,然后将 ID 值与名称(括在括号中)分开。目标 [=27= 中存在的值] 不应插入(因此查询应该插入 5 行):
SQL> insert into target (itn, name)
2 with
3 c2r as
4 -- split column to rows, e.g. "123456 (LED TV)" is an example of such a row
5 (select to_char(trim(regexp_substr(col, '[^;]+', 1, column_value))) val
6 from test join table(cast(multiset(select level from dual
7 connect by level <= regexp_count(col, ';')
8 ) as sys.odcinumberlist)) on 1 = 1
9 ),
10 sep as
11 -- separate ITN (invoice tracking nubmer) and NAME
12 (select substr(val, 1, instr(val, ' ') - 1) itn,
13 substr(val, instr(val, ' ') + 1) name
14 from c2r
15 )
16 select s.itn, replace(replace(s.name, '(', ''), ')', '')
17 from sep s
18 -- don't insert values that already exist in the TARGET table
19 where not exists (select null from target t
20 where t.itn = s.itn
21 );
5 rows created.
SQL>
最终结果:
SQL> select * From target;
ITN NAME
---------- --------------------
234543 LED light
123456 LED TV
654876 LED monitor
665988 Notebook
987654 Mouse
445577 Dead Pixel
6 rows selected.
SQL>
我尝试使用 DBMS_LOB
包将它们转换为除以 ";" 的字符串来完成任务,然后对其进行一些字符串操作以实现结果。
尝试以下操作:
INSERT INTO INVOICE_CATEGORIZED
SELECT TAB.INVOICE_NUMBER, TAB.INVOICE_NAME FROM
(SELECT
TRIM(dbms_lob.SUBSTR(INVOICE_INN,6 ,1)) AS INVOICE_NUMBER,
SUBSTR(INVOICE_INN,
INSTR(INVOICE_INN, '(') + 1,
INSTR(INVOICE_INN, ')') - INSTR(INVOICE_INN, '(') - 1 )
AS INVOICE_NAME
-- HERE INVOICE_INN IS STRING NOW, SO WE CAN DO STRING OPERATIONS ON IT ONWARD
FROM
(
-- DIVIDING ; SEPARATED CLOB TO INDIVIDUAL STRING
SELECT
TRIM(CASE WHEN INVOICE_SINGLE.COLUMN_VALUE = 1 THEN
dbms_lob.SUBSTR(INVOICE,
dbms_lob.INSTR(INVOICE,';',1,INVOICE_SINGLE.COLUMN_VALUE) - 1,
1
)
ELSE
dbms_lob.SUBSTR(INVOICE,
dbms_lob.INSTR(INVOICE,';',1,INVOICE_SINGLE.COLUMN_VALUE) - 1
- dbms_lob.INSTR(INVOICE,';',1,INVOICE_SINGLE.COLUMN_VALUE - 1),
dbms_lob.INSTR(INVOICE,';',1,INVOICE_SINGLE.COLUMN_VALUE - 1) + 1)
END) AS INVOICE_INN
FROM
INVOICES T,
TABLE ( CAST(MULTISET(
SELECT
LEVEL
FROM
DUAL
CONNECT BY
dbms_lob.INSTR(INVOICE,';',1,LEVEL) <> 0
) AS SYS.ODCINUMBERLIST) ) INVOICE_SINGLE)) TAB
WHERE NOT EXISTS (SELECT 1 FROM INVOICE_CATEGORIZED IC
WHERE IC.INVOICE_NUMBER = TAB.INVOICE_NUMBER
AND IC.INVOICE_NAME = TAB.INVOICE_NAME)
干杯!!
我有这样的 CLOB 数据:
123456 (LED TV); 234543 (LED light); 654876 (LED monitor);
现在我需要使用定界符(在我的例子中是“;”)和 select 用于插入目标 table 但仅当记录不存在时。
我看过几个 Instr & Substr 或 Regexp 的例子,但没有我需要的或超出我对 Oracle 的理解。有人能给我一个例子,说明如何根据 CLOB 中的字符串将 CLOB 拆分成行,以便稍后插入时使用它吗?
P.S.: 我更喜欢最快的解决方案,因为我的 CLOB 数据可能包含超过 500 万条发票记录。最后将是一个从 C# 触发的存储过程,但这部分让我很头疼...如有任何帮助 - 在此先感谢!
举个例子。
先测试用例; test
table 包含源数据:
SQL> create table test (col clob);
Table created.
SQL> insert into test
2 select '123456 (LED TV); 234543 (LED light); 654876 (LED monitor);' from dual union all
3 select '665988 (Notebook); 987654 (Mouse); 445577 (Dead Pixel);' from dual;
2 rows created.
SQL>
Target
table 将包含从源中提取的值:
SQL> create table target (itn number, name varchar2(20));
Table created.
SQL> -- This value shouldn't be inserted as it already exists in the TARGET table:
SQL> insert into target values (234543, 'LED light');
1 row created.
SQL>
现在,一些有用的东西。这个想法是将列值拆分为行(这就是分层查询中的 regexp_substr
部分所做的,然后将 ID 值与名称(括在括号中)分开。目标 [=27= 中存在的值] 不应插入(因此查询应该插入 5 行):
SQL> insert into target (itn, name)
2 with
3 c2r as
4 -- split column to rows, e.g. "123456 (LED TV)" is an example of such a row
5 (select to_char(trim(regexp_substr(col, '[^;]+', 1, column_value))) val
6 from test join table(cast(multiset(select level from dual
7 connect by level <= regexp_count(col, ';')
8 ) as sys.odcinumberlist)) on 1 = 1
9 ),
10 sep as
11 -- separate ITN (invoice tracking nubmer) and NAME
12 (select substr(val, 1, instr(val, ' ') - 1) itn,
13 substr(val, instr(val, ' ') + 1) name
14 from c2r
15 )
16 select s.itn, replace(replace(s.name, '(', ''), ')', '')
17 from sep s
18 -- don't insert values that already exist in the TARGET table
19 where not exists (select null from target t
20 where t.itn = s.itn
21 );
5 rows created.
SQL>
最终结果:
SQL> select * From target;
ITN NAME
---------- --------------------
234543 LED light
123456 LED TV
654876 LED monitor
665988 Notebook
987654 Mouse
445577 Dead Pixel
6 rows selected.
SQL>
我尝试使用 DBMS_LOB
包将它们转换为除以 ";" 的字符串来完成任务,然后对其进行一些字符串操作以实现结果。
尝试以下操作:
INSERT INTO INVOICE_CATEGORIZED
SELECT TAB.INVOICE_NUMBER, TAB.INVOICE_NAME FROM
(SELECT
TRIM(dbms_lob.SUBSTR(INVOICE_INN,6 ,1)) AS INVOICE_NUMBER,
SUBSTR(INVOICE_INN,
INSTR(INVOICE_INN, '(') + 1,
INSTR(INVOICE_INN, ')') - INSTR(INVOICE_INN, '(') - 1 )
AS INVOICE_NAME
-- HERE INVOICE_INN IS STRING NOW, SO WE CAN DO STRING OPERATIONS ON IT ONWARD
FROM
(
-- DIVIDING ; SEPARATED CLOB TO INDIVIDUAL STRING
SELECT
TRIM(CASE WHEN INVOICE_SINGLE.COLUMN_VALUE = 1 THEN
dbms_lob.SUBSTR(INVOICE,
dbms_lob.INSTR(INVOICE,';',1,INVOICE_SINGLE.COLUMN_VALUE) - 1,
1
)
ELSE
dbms_lob.SUBSTR(INVOICE,
dbms_lob.INSTR(INVOICE,';',1,INVOICE_SINGLE.COLUMN_VALUE) - 1
- dbms_lob.INSTR(INVOICE,';',1,INVOICE_SINGLE.COLUMN_VALUE - 1),
dbms_lob.INSTR(INVOICE,';',1,INVOICE_SINGLE.COLUMN_VALUE - 1) + 1)
END) AS INVOICE_INN
FROM
INVOICES T,
TABLE ( CAST(MULTISET(
SELECT
LEVEL
FROM
DUAL
CONNECT BY
dbms_lob.INSTR(INVOICE,';',1,LEVEL) <> 0
) AS SYS.ODCINUMBERLIST) ) INVOICE_SINGLE)) TAB
WHERE NOT EXISTS (SELECT 1 FROM INVOICE_CATEGORIZED IC
WHERE IC.INVOICE_NUMBER = TAB.INVOICE_NUMBER
AND IC.INVOICE_NAME = TAB.INVOICE_NAME)
干杯!!