是否可以使用 Apache POI 在 Word 文档的特定位置插入 HTML
Is it possible to insert HTML at a specific position in a Word dokument using Apache POI
我正在尝试使用 Apache POI 在 ms Word 文档的特定位置插入 HTML 文本。
我一直在按照我找到的说明进行操作 by Yaun。这是一个很好的例子,但只展示了如何在文档最后添加 HTML。是因为无法将其插入其他地方,还是有人知道如何插入,并有时间向我解释或指出正确的方向?
提前致谢!
弗雷德里克
要将某些内容插入 XWPFDocument
的正文,需要一个指向插入位置的 XmlCursor
。在方法 XWPFDocument. insertNewParagraph(org.apache.xmlbeans.XmlCursor cursor)
和 XWPFDocument.insertNewTbl(org.apache.xmlbeans.XmlCursor cursor)
.
中已经完成了同样的工作
所以我们需要一个方法 insertAltChunk(XWPFDocument document, MyXWPFHtmlDocument myXWPFHtmlDocument, XmlCursor cursor)
在 document
的 cursor
位置插入 altChunk
指向 myXWPFHtmlDocument
的 Id
.
...
boolean isCursorInBody(XWPFDocument document, XmlCursor cursor) {
XmlCursor verify = cursor.newCursor();
verify.toParent();
boolean result = (verify.getObject() == document.getDocument().getBody());
verify.dispose();
return result;
}
void insertAltChunk(XWPFDocument document, MyXWPFHtmlDocument myXWPFHtmlDocument, XmlCursor cursor) {
if (isCursorInBody(document, cursor)) {
QName ALTCHUNK = new QName("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "altChunk");
QName ID = new QName("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "id");
cursor.beginElement(ALTCHUNK);
cursor.insertAttributeWithValue(ID, myXWPFHtmlDocument.getId());
cursor.dispose();
}
}
...
完整示例,基于我在 中的代码,它在现有 Word
文档的第二段和第五段之前插入 altChunk
s。
import java.io.*;
import org.apache.poi.*;
import org.apache.poi.ooxml.*;
import org.apache.poi.openxml4j.opc.*;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.xmlbeans.XmlCursor;
import javax.xml.namespace.QName;
public class InsertHTMLaltChunkInWordAtCursor {
//a method for creating the htmlDoc /word/htmlDoc#.html in the *.docx ZIP archive
//String id will be htmlDoc#.
private static MyXWPFHtmlDocument createHtmlDoc(XWPFDocument document, String id) throws Exception {
OPCPackage oPCPackage = document.getPackage();
PackagePartName partName = PackagingURIHelper.createPartName("/word/" + id + ".html");
PackagePart part = oPCPackage.createPart(partName, "text/html");
MyXWPFHtmlDocument myXWPFHtmlDocument = new MyXWPFHtmlDocument(part, id);
document.addRelation(myXWPFHtmlDocument.getId(), new XWPFHtmlRelation(), myXWPFHtmlDocument);
return myXWPFHtmlDocument;
}
private static boolean isCursorInBody(XWPFDocument document, XmlCursor cursor) {
XmlCursor verify = cursor.newCursor();
verify.toParent();
boolean result = (verify.getObject() == document.getDocument().getBody());
verify.dispose();
return result;
}
private static void insertAltChunk(XWPFDocument document, MyXWPFHtmlDocument myXWPFHtmlDocument, XmlCursor cursor) {
if (isCursorInBody(document, cursor)) {
QName ALTCHUNK = new QName("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "altChunk");
QName ID = new QName("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "id");
cursor.beginElement(ALTCHUNK);
cursor.insertAttributeWithValue(ID, myXWPFHtmlDocument.getId());
cursor.dispose();
}
}
public static void main(String[] args) throws Exception {
XWPFDocument document = new XWPFDocument(new FileInputStream("./WordDocument.docx"));
XWPFParagraph paragraph;
MyXWPFHtmlDocument myXWPFHtmlDocument;
myXWPFHtmlDocument = createHtmlDoc(document, "htmlDoc1");
myXWPFHtmlDocument.setHtml(myXWPFHtmlDocument.getHtml().replace("<body></body>",
"<body><p>Simple <b>HTML</b> <i>formatted</i> <u>text</u></p></body>"));
paragraph = document.getParagraphArray(1);
XmlCursor cursor = paragraph.getCTP().newCursor();
insertAltChunk(document, myXWPFHtmlDocument, cursor);
myXWPFHtmlDocument = createHtmlDoc(document, "htmlDoc2");
myXWPFHtmlDocument.setHtml(myXWPFHtmlDocument.getHtml().replace("<body></body>",
"<body>" +
"<table border=\"1\">"+
"<caption>A table</caption>" +
"<tr><th>Name</th><th>Date</th><th>Amount</th></tr>" +
"<tr><td>John Doe</td><td>2018-12-01</td><td>1,234.56</td></tr>" +
"</table>" +
"</body>"
));
paragraph = document.getParagraphArray(4);
cursor = paragraph.getCTP().newCursor();
insertAltChunk(document, myXWPFHtmlDocument, cursor);
FileOutputStream out = new FileOutputStream("InsertHTMLaltChunkInWordAtCursor.docx");
document.write(out);
out.close();
document.close();
}
//a wrapper class for the htmlDoc /word/htmlDoc#.html in the *.docx ZIP archive
//provides methods for manipulating the HTML
//TODO: We should *not* using String methods for manipulating HTML!
private static class MyXWPFHtmlDocument extends POIXMLDocumentPart {
private String html;
private String id;
private MyXWPFHtmlDocument(PackagePart part, String id) throws Exception {
super(part);
this.html = "<!DOCTYPE html><html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"><style></style><title>HTML import</title></head><body></body>";
this.id = id;
}
private String getId() {
return id;
}
private String getHtml() {
return html;
}
private void setHtml(String html) {
this.html = html;
}
@Override
protected void commit() throws IOException {
PackagePart part = getPackagePart();
OutputStream out = part.getOutputStream();
Writer writer = new OutputStreamWriter(out, "UTF-8");
writer.write(html);
writer.close();
out.close();
}
}
//the XWPFRelation for /word/htmlDoc#.html
private final static class XWPFHtmlRelation extends POIXMLRelation {
private XWPFHtmlRelation() {
super(
"text/html",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/aFChunk",
"/word/htmlDoc#.html");
}
}
}
另见 。
我正在尝试使用 Apache POI 在 ms Word 文档的特定位置插入 HTML 文本。
我一直在按照我找到的说明进行操作
提前致谢!
弗雷德里克
要将某些内容插入 XWPFDocument
的正文,需要一个指向插入位置的 XmlCursor
。在方法 XWPFDocument. insertNewParagraph(org.apache.xmlbeans.XmlCursor cursor)
和 XWPFDocument.insertNewTbl(org.apache.xmlbeans.XmlCursor cursor)
.
所以我们需要一个方法 insertAltChunk(XWPFDocument document, MyXWPFHtmlDocument myXWPFHtmlDocument, XmlCursor cursor)
在 document
的 cursor
位置插入 altChunk
指向 myXWPFHtmlDocument
的 Id
.
...
boolean isCursorInBody(XWPFDocument document, XmlCursor cursor) {
XmlCursor verify = cursor.newCursor();
verify.toParent();
boolean result = (verify.getObject() == document.getDocument().getBody());
verify.dispose();
return result;
}
void insertAltChunk(XWPFDocument document, MyXWPFHtmlDocument myXWPFHtmlDocument, XmlCursor cursor) {
if (isCursorInBody(document, cursor)) {
QName ALTCHUNK = new QName("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "altChunk");
QName ID = new QName("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "id");
cursor.beginElement(ALTCHUNK);
cursor.insertAttributeWithValue(ID, myXWPFHtmlDocument.getId());
cursor.dispose();
}
}
...
完整示例,基于我在 Word
文档的第二段和第五段之前插入 altChunk
s。
import java.io.*;
import org.apache.poi.*;
import org.apache.poi.ooxml.*;
import org.apache.poi.openxml4j.opc.*;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.xmlbeans.XmlCursor;
import javax.xml.namespace.QName;
public class InsertHTMLaltChunkInWordAtCursor {
//a method for creating the htmlDoc /word/htmlDoc#.html in the *.docx ZIP archive
//String id will be htmlDoc#.
private static MyXWPFHtmlDocument createHtmlDoc(XWPFDocument document, String id) throws Exception {
OPCPackage oPCPackage = document.getPackage();
PackagePartName partName = PackagingURIHelper.createPartName("/word/" + id + ".html");
PackagePart part = oPCPackage.createPart(partName, "text/html");
MyXWPFHtmlDocument myXWPFHtmlDocument = new MyXWPFHtmlDocument(part, id);
document.addRelation(myXWPFHtmlDocument.getId(), new XWPFHtmlRelation(), myXWPFHtmlDocument);
return myXWPFHtmlDocument;
}
private static boolean isCursorInBody(XWPFDocument document, XmlCursor cursor) {
XmlCursor verify = cursor.newCursor();
verify.toParent();
boolean result = (verify.getObject() == document.getDocument().getBody());
verify.dispose();
return result;
}
private static void insertAltChunk(XWPFDocument document, MyXWPFHtmlDocument myXWPFHtmlDocument, XmlCursor cursor) {
if (isCursorInBody(document, cursor)) {
QName ALTCHUNK = new QName("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "altChunk");
QName ID = new QName("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "id");
cursor.beginElement(ALTCHUNK);
cursor.insertAttributeWithValue(ID, myXWPFHtmlDocument.getId());
cursor.dispose();
}
}
public static void main(String[] args) throws Exception {
XWPFDocument document = new XWPFDocument(new FileInputStream("./WordDocument.docx"));
XWPFParagraph paragraph;
MyXWPFHtmlDocument myXWPFHtmlDocument;
myXWPFHtmlDocument = createHtmlDoc(document, "htmlDoc1");
myXWPFHtmlDocument.setHtml(myXWPFHtmlDocument.getHtml().replace("<body></body>",
"<body><p>Simple <b>HTML</b> <i>formatted</i> <u>text</u></p></body>"));
paragraph = document.getParagraphArray(1);
XmlCursor cursor = paragraph.getCTP().newCursor();
insertAltChunk(document, myXWPFHtmlDocument, cursor);
myXWPFHtmlDocument = createHtmlDoc(document, "htmlDoc2");
myXWPFHtmlDocument.setHtml(myXWPFHtmlDocument.getHtml().replace("<body></body>",
"<body>" +
"<table border=\"1\">"+
"<caption>A table</caption>" +
"<tr><th>Name</th><th>Date</th><th>Amount</th></tr>" +
"<tr><td>John Doe</td><td>2018-12-01</td><td>1,234.56</td></tr>" +
"</table>" +
"</body>"
));
paragraph = document.getParagraphArray(4);
cursor = paragraph.getCTP().newCursor();
insertAltChunk(document, myXWPFHtmlDocument, cursor);
FileOutputStream out = new FileOutputStream("InsertHTMLaltChunkInWordAtCursor.docx");
document.write(out);
out.close();
document.close();
}
//a wrapper class for the htmlDoc /word/htmlDoc#.html in the *.docx ZIP archive
//provides methods for manipulating the HTML
//TODO: We should *not* using String methods for manipulating HTML!
private static class MyXWPFHtmlDocument extends POIXMLDocumentPart {
private String html;
private String id;
private MyXWPFHtmlDocument(PackagePart part, String id) throws Exception {
super(part);
this.html = "<!DOCTYPE html><html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"><style></style><title>HTML import</title></head><body></body>";
this.id = id;
}
private String getId() {
return id;
}
private String getHtml() {
return html;
}
private void setHtml(String html) {
this.html = html;
}
@Override
protected void commit() throws IOException {
PackagePart part = getPackagePart();
OutputStream out = part.getOutputStream();
Writer writer = new OutputStreamWriter(out, "UTF-8");
writer.write(html);
writer.close();
out.close();
}
}
//the XWPFRelation for /word/htmlDoc#.html
private final static class XWPFHtmlRelation extends POIXMLRelation {
private XWPFHtmlRelation() {
super(
"text/html",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/aFChunk",
"/word/htmlDoc#.html");
}
}
}
另见