Java: 操作保存在 Clob/Blob 中的 docx 文档作为文本
Java: Manipulate docx document saved in Clob/Blob as text
我在 Oracle Clob 或 mysql Blob 中保存了一个 word 文档我编写了以下代码以从 DB 中读取 --> 保存到 .docx --> 操作 docx 文档中的文本。我的问题是有什么方法可以在不将数据写入 docx 文档的情况下操作 docx 文档中的文本?
谢谢:)
private static String url = "jdbc:mysql://localhost/test";
private static String username = "root";
private static String password = "root";
public static void main( String[] args) throws ClassNotFoundException, SQLException, IOException
{
Connection conn = null;
Class.forName("com.mysql.jdbc.Driver");
conn = DriverManager.getConnection(url, username, password);
String sql = "SELECT name, description, data FROM documents ";
PreparedStatement stmt = conn.prepareStatement(sql);
ResultSet resultSet = stmt.executeQuery();
while (resultSet.next()) {
String name = resultSet.getString(1);
System.out.println("Name = " + name);
String description = resultSet.getString(2);
System.out.println("Description = " + description);
//
// Get the character stream of our CLOB data
//
Blob blob = resultSet.getBlob(3);
// System.out.println(convertLOB(blob));//convertLOB(blob).toString());
OutputStream fwriter = new FileOutputStream("C:\The Appfuce Primer.docx");
readFromBlob(blob,fwriter);
String target = "C:\The Appfuce Primer.docx";
File document = new File(target);
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
try {
parser.parse(new FileInputStream(document), handler, metadata, new ParseContext());
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (TikaException e) {
e.printStackTrace();
}
System.out.println(metadata);
System.out.println(handler.toString());
}
}
final static int bBufLen = 4 * 8192;
public static long readFromBlob(Blob blob, OutputStream out)
throws SQLException, IOException {
InputStream in = blob.getBinaryStream();
int length = -1;
long read = 0;
byte[] buf = new byte[bBufLen];
while ((length = in.read(buf)) != -1) {
out.write(buf, 0, length);
read += length;
}
in.close();
return read;
}
也许您可以直接使用 blob.getBinaryStream():
调用 parser.parse
...
parser.parse(blob.getBinaryStream(), handler, metadata, new ParseContext());
...
因此您不必创建包含 docx 文档的临时文件。
您可以使用 Apache POI 项目来访问您的 .docx 文档的内容。
我在 Oracle Clob 或 mysql Blob 中保存了一个 word 文档我编写了以下代码以从 DB 中读取 --> 保存到 .docx --> 操作 docx 文档中的文本。我的问题是有什么方法可以在不将数据写入 docx 文档的情况下操作 docx 文档中的文本? 谢谢:)
private static String url = "jdbc:mysql://localhost/test";
private static String username = "root";
private static String password = "root";
public static void main( String[] args) throws ClassNotFoundException, SQLException, IOException
{
Connection conn = null;
Class.forName("com.mysql.jdbc.Driver");
conn = DriverManager.getConnection(url, username, password);
String sql = "SELECT name, description, data FROM documents ";
PreparedStatement stmt = conn.prepareStatement(sql);
ResultSet resultSet = stmt.executeQuery();
while (resultSet.next()) {
String name = resultSet.getString(1);
System.out.println("Name = " + name);
String description = resultSet.getString(2);
System.out.println("Description = " + description);
//
// Get the character stream of our CLOB data
//
Blob blob = resultSet.getBlob(3);
// System.out.println(convertLOB(blob));//convertLOB(blob).toString());
OutputStream fwriter = new FileOutputStream("C:\The Appfuce Primer.docx");
readFromBlob(blob,fwriter);
String target = "C:\The Appfuce Primer.docx";
File document = new File(target);
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
try {
parser.parse(new FileInputStream(document), handler, metadata, new ParseContext());
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (TikaException e) {
e.printStackTrace();
}
System.out.println(metadata);
System.out.println(handler.toString());
}
}
final static int bBufLen = 4 * 8192;
public static long readFromBlob(Blob blob, OutputStream out)
throws SQLException, IOException {
InputStream in = blob.getBinaryStream();
int length = -1;
long read = 0;
byte[] buf = new byte[bBufLen];
while ((length = in.read(buf)) != -1) {
out.write(buf, 0, length);
read += length;
}
in.close();
return read;
}
也许您可以直接使用 blob.getBinaryStream():
调用 parser.parse...
parser.parse(blob.getBinaryStream(), handler, metadata, new ParseContext());
...
因此您不必创建包含 docx 文档的临时文件。
您可以使用 Apache POI 项目来访问您的 .docx 文档的内容。