修改 CLOB 以删除重复项
modify CLOB to remove duplicates
我有一个 table,其中包含一个 CLOB。基于 CLOB 我调用了一个函数来创建一个哈希键。
由于一些 CLOB 是重复的,这反过来又生成了重复的散列键,这使我无法在列 hash_val.
上创建 PRIMARY KEY
下面是重复的分组。
我想做的是根据组修改列表中的每个 CLOB,方法是在每个 CLOB 的底部附加一个时间戳,这样就不会再有重复,然后我可以添加 PRIMARY KEY。
我希望有人可以通过生成循环并附加时间戳来帮助我,我将在 table 上放置一个 INSERT/update 触发器以生成未来的哈希值。
CREATE table table_z(
seq_num integer GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
val NUMBER,
hash_val VARCHAR2(1000),
clob_val CLOB);
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
1, '4714870AFF6C97CA09D135834FDB58A6389A50C11FEF8EC4AFEF466FB60A23AC6B7A9C92658F14DF4993D6B40A4E4D8424196AFC347E97640D68DE61E1CF14B0', 'aaaaaaaaaa');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
1, 'F368A29B71BD201A7EF78B5DF88B1361FBE83F959756D33793837A5D7B2EAF660F2F6C7E2FBACE01965683C4CFAFDED3FF28AAB34E329AA79BC81E7703F68B86', 'aaaaa');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
2, '517C1CDB694A83ABF80A1D91EE91059B6443769DBEDDF3F5CC583CCCCC1CCDFE9E5330C61830D9E25AF03536909E8272F056C8FF1FBC9AABD3492C291A735B58', 'Xaaaaaaaaa');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
2, 'D597AD764E82E38DED6184527197C5CA39743F805F1D2355A89E62ECA275D62CD545DDFA57A36B37C711527A63717A69586CBE78AD056A92A0C6479391FC2349', 'xxxx');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
3, '9B71D224BD62F3785D96D46AD3EA3D73319BFBC2890CAADAE2DFF72519673CA72323C3D99BA5C11D7C7ACC6E14B8C5DA0C4663475C2E5C3ADEF46F73BCDEC043', 'hello');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
1, '4714870AFF6C97CA09D135834FDB58A6389A50C11FEF8EC4AFEF466FB60A23AC6B7A9C92658F14DF4993D6B40A4E4D8424196AFC347E97640D68DE61E1CF14B0', 'aaaaaaaaaa');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
2, '4714870AFF6C97CA09D135834FDB58A6389A50C11FEF8EC4AFEF466FB60A23AC6B7A9C92658F14DF4993D6B40A4E4D8424196AFC347E97640D68DE61E1CF14B0', 'aaaaaaaaaa');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
2, '6522DA2F3FE4F163D52ACEF62440C086BE5EC1203C2CE90A5427546A1CAFE6440618FD3AF2C8A3362AB7BC7544600CA77BED41F95D8038A8A7CC458177691474', 'oracle');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
3, '6522DA2F3FE4F163D52ACEF62440C086BE5EC1203C2CE90A5427546A1CAFE6440618FD3AF2C8A3362AB7BC7544600CA77BED41F95D8038A8A7CC458177691474', 'oracle');
SELECT
listagg(seq_num,',') within group(order by seq_num) seq_num,
hash_val, COUNT(hash_val)
FROM table_z
GROUP BY hash_val
HAVING COUNT(hash_val) > 1;
SEQ_NUM HASH_VAL COUNT(HASH_VAL)
1,6,7 4714870AFF6C97CA09D135834FDB58A6389A50C11FEF8EC4AFEF466FB60A23AC6B7A9C92658F14DF4993D6B40A4E4D8424196AFC347E97640D68DE61E1CF14B0 3
8,9 6522DA2F3FE4F163D52ACEF62440C086BE5EC1203C2CE90A5427546A1CAFE6440618FD3AF2C8A3362AB7BC7544600CA77BED41F95D8038A8A7CC458177691474 2
你真的不需要循环,你可以用更新来完成。我通常看到它是这样完成的,您可以在其中选择您 不想 更改的副本,通常是系列的最小值或最大值,然后更改所有副本 greater/less比那个参考。
update table_z
set clob_val = clob_val || (systimestamp +seq_num/10000)
where exists -- only update if there's a duplicate with a lower seq_num
(select 1 from table_z min_z
where min_z.hash_val = table_z.hash_val
and min_z.seq_num < table_z.seq_num)
我在时间戳中添加了 +seq_num/10000
,因此它会为每一行添加几秒钟,以便每个系列中的 none 个 clob 获得相同的时间戳。
我有一个 table,其中包含一个 CLOB。基于 CLOB 我调用了一个函数来创建一个哈希键。
由于一些 CLOB 是重复的,这反过来又生成了重复的散列键,这使我无法在列 hash_val.
上创建 PRIMARY KEY下面是重复的分组。
我想做的是根据组修改列表中的每个 CLOB,方法是在每个 CLOB 的底部附加一个时间戳,这样就不会再有重复,然后我可以添加 PRIMARY KEY。
我希望有人可以通过生成循环并附加时间戳来帮助我,我将在 table 上放置一个 INSERT/update 触发器以生成未来的哈希值。
CREATE table table_z(
seq_num integer GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
val NUMBER,
hash_val VARCHAR2(1000),
clob_val CLOB);
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
1, '4714870AFF6C97CA09D135834FDB58A6389A50C11FEF8EC4AFEF466FB60A23AC6B7A9C92658F14DF4993D6B40A4E4D8424196AFC347E97640D68DE61E1CF14B0', 'aaaaaaaaaa');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
1, 'F368A29B71BD201A7EF78B5DF88B1361FBE83F959756D33793837A5D7B2EAF660F2F6C7E2FBACE01965683C4CFAFDED3FF28AAB34E329AA79BC81E7703F68B86', 'aaaaa');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
2, '517C1CDB694A83ABF80A1D91EE91059B6443769DBEDDF3F5CC583CCCCC1CCDFE9E5330C61830D9E25AF03536909E8272F056C8FF1FBC9AABD3492C291A735B58', 'Xaaaaaaaaa');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
2, 'D597AD764E82E38DED6184527197C5CA39743F805F1D2355A89E62ECA275D62CD545DDFA57A36B37C711527A63717A69586CBE78AD056A92A0C6479391FC2349', 'xxxx');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
3, '9B71D224BD62F3785D96D46AD3EA3D73319BFBC2890CAADAE2DFF72519673CA72323C3D99BA5C11D7C7ACC6E14B8C5DA0C4663475C2E5C3ADEF46F73BCDEC043', 'hello');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
1, '4714870AFF6C97CA09D135834FDB58A6389A50C11FEF8EC4AFEF466FB60A23AC6B7A9C92658F14DF4993D6B40A4E4D8424196AFC347E97640D68DE61E1CF14B0', 'aaaaaaaaaa');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
2, '4714870AFF6C97CA09D135834FDB58A6389A50C11FEF8EC4AFEF466FB60A23AC6B7A9C92658F14DF4993D6B40A4E4D8424196AFC347E97640D68DE61E1CF14B0', 'aaaaaaaaaa');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
2, '6522DA2F3FE4F163D52ACEF62440C086BE5EC1203C2CE90A5427546A1CAFE6440618FD3AF2C8A3362AB7BC7544600CA77BED41F95D8038A8A7CC458177691474', 'oracle');
INSERT into table_z ( VAL, HASH_VAL, CLOB_VAL) VALUES (
3, '6522DA2F3FE4F163D52ACEF62440C086BE5EC1203C2CE90A5427546A1CAFE6440618FD3AF2C8A3362AB7BC7544600CA77BED41F95D8038A8A7CC458177691474', 'oracle');
SELECT
listagg(seq_num,',') within group(order by seq_num) seq_num,
hash_val, COUNT(hash_val)
FROM table_z
GROUP BY hash_val
HAVING COUNT(hash_val) > 1;
SEQ_NUM HASH_VAL COUNT(HASH_VAL)
1,6,7 4714870AFF6C97CA09D135834FDB58A6389A50C11FEF8EC4AFEF466FB60A23AC6B7A9C92658F14DF4993D6B40A4E4D8424196AFC347E97640D68DE61E1CF14B0 3
8,9 6522DA2F3FE4F163D52ACEF62440C086BE5EC1203C2CE90A5427546A1CAFE6440618FD3AF2C8A3362AB7BC7544600CA77BED41F95D8038A8A7CC458177691474 2
你真的不需要循环,你可以用更新来完成。我通常看到它是这样完成的,您可以在其中选择您 不想 更改的副本,通常是系列的最小值或最大值,然后更改所有副本 greater/less比那个参考。
update table_z
set clob_val = clob_val || (systimestamp +seq_num/10000)
where exists -- only update if there's a duplicate with a lower seq_num
(select 1 from table_z min_z
where min_z.hash_val = table_z.hash_val
and min_z.seq_num < table_z.seq_num)
我在时间戳中添加了 +seq_num/10000
,因此它会为每一行添加几秒钟,以便每个系列中的 none 个 clob 获得相同的时间戳。