Teradata CLOB 批处理对 JDBC 没用吗?
Is Teradata CLOB batch processing useless with JDBC?
我想我知道这个问题的答案,但我也想和这里的专家确认一下。我认为答案是:"Yes, because the batch size limit is 16, which is too little. So practically speaking batch processing is useless with Teradata CLOB."
这是我的推理。这是有效的 Java 代码。我使用流 table 从一个数据库连接复制到另一个数据库连接
public class TestClob {
public void test() throws ClassNotFoundException, SQLException, IOException {
Connection conn1, conn2;
conn1 = DriverManager.getConnection(..., user, pass);
conn2 = DriverManager.getConnection(..., user, pass);
Statement select = conn1.createStatement();
ResultSet rs = select.executeQuery("SELECT TOP 100 myClob FROM myTab " );
int totalRowNumber = 0;
PreparedStatement ps = null;
Clob clob = null;
Reader clobReader = null;
while (rs.next()) {
totalRowNumber++;
System.out.println(totalRowNumber);
clob = rs.getClob(1);
clobReader = clob.getCharacterStream();
ps = conn2.prepareStatement("INSERT INTO myTab2 (myClob2) values (?) ");
ps.setCharacterStream(1, clobReader , clob.length() );
ps.execute(); // HERE I just execute the current row
clob.free(); // FREE the CLOB and READER objects
clobReader.close();
}
conn2.commit();
ps.close();
select.close();
rs.close();
根据 Teradata 规则,我不能同时打开超过 16 个与 LOB
相关的对象。
因此我必须确保 Clob clob
和 Reader clobReader
分别被释放和关闭。
所以我有两个选择
1) 执行executeBatch()
方法,一次最多有16 个Clob clob
和Reader clobReader
个对象。
2) 执行 execute()
方法并在之后立即关闭 Clob clob
和 Reader clobReader
对象。
结论:Teradata CLOB 批量插入对 JDBC 毫无用处。尝试插入 Clob 时不能设置超过 16 的批量大小
请帮助我,如果我理解正确请告诉我
我看不到任何其他方式
您可以在此处找到批量插入超过 16 个 Clob 的示例。
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.security.GeneralSecurityException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
public class ClobBatch {
public static void main(String[] args) throws GeneralSecurityException, IOException, SQLException {
String databaseCredentials = ExternalData.getCredentials();
Connection c1=DriverManager.getConnection(databaseCredentials);
Connection c2=DriverManager.getConnection(databaseCredentials);
String sql="create volatile table clob_test_input ( id bigint, longobj clob) no primary index on commit preserve rows;";
Statement s=c1.createStatement();
s.execute(sql);
String sql2="create volatile table clob_test_target ( id bigint, longobj clob) no primary index on commit preserve rows;";
Statement s2=c2.createStatement();
s2.execute(sql2);
System.out.println("Inserting test data");
PreparedStatement ps=c1.prepareStatement("insert into clob_test_input (id, longobj) values (?,?);");
for(int i=0; i<1000; i++) {
String st=randomLargeString();
ps.setInt(1, i);
ps.setCharacterStream(2, new BufferedReader(new StringReader(st)), st.length());
ps.addBatch();
}
ps.executeBatch();
System.out.println("reading test data from input table");
Statement select=c1.createStatement();
ResultSet rs=select.executeQuery("select * from clob_test_input");
PreparedStatement ps2=c2.prepareStatement("insert into clob_test_target (id, longobj) values (?,?);");
List<Reader> readerToClose=new ArrayList<Reader>();
System.out.println("start batch creation");
while(rs.next()) {
int pos=rs.getInt("id");
Reader rdr=new BufferedReader(rs.getCharacterStream("longobj"));
StringBuffer buffer=new StringBuffer();
int c=0;
while((c=rdr.read())!=-1) {
buffer.append((char)c);
}
rdr.close();
ps2.setInt(1, pos);
Reader strReader= new StringReader(buffer.toString());
ps2.setCharacterStream(2, strReader,buffer.length());
readerToClose.add(strReader);
ps2.addBatch();
}
System.out.println("start batch execution");
ps2.executeBatch();
rs.close();
c1.commit();
c2.commit();
for(Reader r:readerToClose) r.close();
Statement selectTest=c2.createStatement();
ResultSet rsTest=selectTest.executeQuery("select * from clob_test_target");
System.out.println("show results");
int i=0;
while(rsTest.next()) {
BufferedReader is=new BufferedReader(rsTest.getCharacterStream("longobj"));
StringBuilder sb=new StringBuilder();
int c=0;
while((c=is.read())!=-1) {
sb.append((char)c);
}
is.close();
System.out.println(""+rsTest.getInt("id")+' '+sb.toString().substring(0,80));
}
rsTest.close();
}
private static String randomLargeString() {
StringBuilder sb=new StringBuilder();
for(int i=0;i<10000; i++) {
sb.append((char) (64+Math.random()*20));
}
return sb.toString();
}
}
我已经研究了一些乐观的假设(例如 10000 个字符的 Clob),但是通过使用临时文件而不是 StringBuffers 可以使该方法占用更少的内存。
该方法基本上是找到一些 "buffer"(无论是在内存中还是在临时文件中)来保存源数据库中的数据,以便您可以关闭输入 ClobReader。然后你可以从没有 16 个限制的缓冲区中批量插入数据(你仍然有内存限制)。
我想我知道这个问题的答案,但我也想和这里的专家确认一下。我认为答案是:"Yes, because the batch size limit is 16, which is too little. So practically speaking batch processing is useless with Teradata CLOB."
这是我的推理。这是有效的 Java 代码。我使用流 table 从一个数据库连接复制到另一个数据库连接
public class TestClob {
public void test() throws ClassNotFoundException, SQLException, IOException {
Connection conn1, conn2;
conn1 = DriverManager.getConnection(..., user, pass);
conn2 = DriverManager.getConnection(..., user, pass);
Statement select = conn1.createStatement();
ResultSet rs = select.executeQuery("SELECT TOP 100 myClob FROM myTab " );
int totalRowNumber = 0;
PreparedStatement ps = null;
Clob clob = null;
Reader clobReader = null;
while (rs.next()) {
totalRowNumber++;
System.out.println(totalRowNumber);
clob = rs.getClob(1);
clobReader = clob.getCharacterStream();
ps = conn2.prepareStatement("INSERT INTO myTab2 (myClob2) values (?) ");
ps.setCharacterStream(1, clobReader , clob.length() );
ps.execute(); // HERE I just execute the current row
clob.free(); // FREE the CLOB and READER objects
clobReader.close();
}
conn2.commit();
ps.close();
select.close();
rs.close();
根据 Teradata 规则,我不能同时打开超过 16 个与 LOB
相关的对象。
因此我必须确保 Clob clob
和 Reader clobReader
分别被释放和关闭。
所以我有两个选择
1) 执行executeBatch()
方法,一次最多有16 个Clob clob
和Reader clobReader
个对象。
2) 执行 execute()
方法并在之后立即关闭 Clob clob
和 Reader clobReader
对象。
结论:Teradata CLOB 批量插入对 JDBC 毫无用处。尝试插入 Clob 时不能设置超过 16 的批量大小
请帮助我,如果我理解正确请告诉我
我看不到任何其他方式
您可以在此处找到批量插入超过 16 个 Clob 的示例。
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.security.GeneralSecurityException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
public class ClobBatch {
public static void main(String[] args) throws GeneralSecurityException, IOException, SQLException {
String databaseCredentials = ExternalData.getCredentials();
Connection c1=DriverManager.getConnection(databaseCredentials);
Connection c2=DriverManager.getConnection(databaseCredentials);
String sql="create volatile table clob_test_input ( id bigint, longobj clob) no primary index on commit preserve rows;";
Statement s=c1.createStatement();
s.execute(sql);
String sql2="create volatile table clob_test_target ( id bigint, longobj clob) no primary index on commit preserve rows;";
Statement s2=c2.createStatement();
s2.execute(sql2);
System.out.println("Inserting test data");
PreparedStatement ps=c1.prepareStatement("insert into clob_test_input (id, longobj) values (?,?);");
for(int i=0; i<1000; i++) {
String st=randomLargeString();
ps.setInt(1, i);
ps.setCharacterStream(2, new BufferedReader(new StringReader(st)), st.length());
ps.addBatch();
}
ps.executeBatch();
System.out.println("reading test data from input table");
Statement select=c1.createStatement();
ResultSet rs=select.executeQuery("select * from clob_test_input");
PreparedStatement ps2=c2.prepareStatement("insert into clob_test_target (id, longobj) values (?,?);");
List<Reader> readerToClose=new ArrayList<Reader>();
System.out.println("start batch creation");
while(rs.next()) {
int pos=rs.getInt("id");
Reader rdr=new BufferedReader(rs.getCharacterStream("longobj"));
StringBuffer buffer=new StringBuffer();
int c=0;
while((c=rdr.read())!=-1) {
buffer.append((char)c);
}
rdr.close();
ps2.setInt(1, pos);
Reader strReader= new StringReader(buffer.toString());
ps2.setCharacterStream(2, strReader,buffer.length());
readerToClose.add(strReader);
ps2.addBatch();
}
System.out.println("start batch execution");
ps2.executeBatch();
rs.close();
c1.commit();
c2.commit();
for(Reader r:readerToClose) r.close();
Statement selectTest=c2.createStatement();
ResultSet rsTest=selectTest.executeQuery("select * from clob_test_target");
System.out.println("show results");
int i=0;
while(rsTest.next()) {
BufferedReader is=new BufferedReader(rsTest.getCharacterStream("longobj"));
StringBuilder sb=new StringBuilder();
int c=0;
while((c=is.read())!=-1) {
sb.append((char)c);
}
is.close();
System.out.println(""+rsTest.getInt("id")+' '+sb.toString().substring(0,80));
}
rsTest.close();
}
private static String randomLargeString() {
StringBuilder sb=new StringBuilder();
for(int i=0;i<10000; i++) {
sb.append((char) (64+Math.random()*20));
}
return sb.toString();
}
}
我已经研究了一些乐观的假设(例如 10000 个字符的 Clob),但是通过使用临时文件而不是 StringBuffers 可以使该方法占用更少的内存。
该方法基本上是找到一些 "buffer"(无论是在内存中还是在临时文件中)来保存源数据库中的数据,以便您可以关闭输入 ClobReader。然后你可以从没有 16 个限制的缓冲区中批量插入数据(你仍然有内存限制)。