是否可以使用 Apache POI 在 Word 文档的特定位置插入 HTML

Is it possible to insert HTML at a specific position in a Word dokument using Apache POI

我正在尝试使用 Apache POI 在 ms Word 文档的特定位置插入 HTML 文本。

我一直在按照我找到的说明进行操作 by Yaun。这是一个很好的例子,但只展示了如何在文档最后添加 HTML。是因为无法将其插入其他地方,还是有人知道如何插入,并有时间向我解释或指出正确的方向?

提前致谢!

弗雷德里克

要将某些内容插入 XWPFDocument 的正文,需要一个指向插入位置的 XmlCursor。在方法 XWPFDocument. insertNewParagraph(org.apache.xmlbeans.XmlCursor cursor)XWPFDocument.insertNewTbl(org.apache.xmlbeans.XmlCursor cursor).

中已经完成了同样的工作

所以我们需要一个方法 insertAltChunk(XWPFDocument document, MyXWPFHtmlDocument myXWPFHtmlDocument, XmlCursor cursor)documentcursor 位置插入 altChunk 指向 myXWPFHtmlDocumentId .

...
 boolean isCursorInBody(XWPFDocument document, XmlCursor cursor) {
  XmlCursor verify = cursor.newCursor();
  verify.toParent();
  boolean result = (verify.getObject() == document.getDocument().getBody());
  verify.dispose();
  return result;
 }
 
 void insertAltChunk(XWPFDocument document, MyXWPFHtmlDocument myXWPFHtmlDocument, XmlCursor cursor) {
  if (isCursorInBody(document, cursor)) {
   QName ALTCHUNK = new QName("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "altChunk");
   QName ID = new QName("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "id");
   cursor.beginElement(ALTCHUNK);
   cursor.insertAttributeWithValue(ID, myXWPFHtmlDocument.getId());
   cursor.dispose();   
  }  
 }
...

完整示例,基于我在 中的代码,它在现有 Word 文档的第二段和第五段之前插入 altChunks。

import java.io.*;

import org.apache.poi.*;
import org.apache.poi.ooxml.*;
import org.apache.poi.openxml4j.opc.*;

import org.apache.poi.xwpf.usermodel.*;

import org.apache.xmlbeans.XmlCursor;
import javax.xml.namespace.QName;

public class InsertHTMLaltChunkInWordAtCursor {

 //a method for creating the htmlDoc /word/htmlDoc#.html in the *.docx ZIP archive  
 //String id will be htmlDoc#.
 private static MyXWPFHtmlDocument createHtmlDoc(XWPFDocument document, String id) throws Exception {
  OPCPackage oPCPackage = document.getPackage();
  PackagePartName partName = PackagingURIHelper.createPartName("/word/" + id + ".html");
  PackagePart part = oPCPackage.createPart(partName, "text/html");
  MyXWPFHtmlDocument myXWPFHtmlDocument = new MyXWPFHtmlDocument(part, id);
  document.addRelation(myXWPFHtmlDocument.getId(), new XWPFHtmlRelation(), myXWPFHtmlDocument);
  return myXWPFHtmlDocument;
 }
 
 private static boolean isCursorInBody(XWPFDocument document, XmlCursor cursor) {
  XmlCursor verify = cursor.newCursor();
  verify.toParent();
  boolean result = (verify.getObject() == document.getDocument().getBody());
  verify.dispose();
  return result;
 }
 
 private static void insertAltChunk(XWPFDocument document, MyXWPFHtmlDocument myXWPFHtmlDocument, XmlCursor cursor) {
  if (isCursorInBody(document, cursor)) {
   QName ALTCHUNK = new QName("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "altChunk");
   QName ID = new QName("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "id");
   cursor.beginElement(ALTCHUNK);
   cursor.insertAttributeWithValue(ID, myXWPFHtmlDocument.getId());
   cursor.dispose();   
  }  
 }

 public static void main(String[] args) throws Exception {

  XWPFDocument document = new XWPFDocument(new FileInputStream("./WordDocument.docx"));
  
  XWPFParagraph paragraph;

  MyXWPFHtmlDocument myXWPFHtmlDocument;

  myXWPFHtmlDocument = createHtmlDoc(document, "htmlDoc1");
  myXWPFHtmlDocument.setHtml(myXWPFHtmlDocument.getHtml().replace("<body></body>",
   "<body><p>Simple <b>HTML</b> <i>formatted</i> <u>text</u></p></body>"));
  paragraph = document.getParagraphArray(1);
  XmlCursor cursor = paragraph.getCTP().newCursor();
  insertAltChunk(document, myXWPFHtmlDocument, cursor);
  
  myXWPFHtmlDocument = createHtmlDoc(document, "htmlDoc2");
  myXWPFHtmlDocument.setHtml(myXWPFHtmlDocument.getHtml().replace("<body></body>",
   "<body>" +
   "<table border=\"1\">"+
   "<caption>A table</caption>" +
   "<tr><th>Name</th><th>Date</th><th>Amount</th></tr>" +
   "<tr><td>John Doe</td><td>2018-12-01</td><td>1,234.56</td></tr>" +
   "</table>" +
   "</body>"
   ));
  paragraph = document.getParagraphArray(4);
  cursor = paragraph.getCTP().newCursor();
  insertAltChunk(document, myXWPFHtmlDocument, cursor);

  FileOutputStream out = new FileOutputStream("InsertHTMLaltChunkInWordAtCursor.docx");
  document.write(out);
  out.close();
  document.close();

 }

 //a wrapper class for the  htmlDoc /word/htmlDoc#.html in the *.docx ZIP archive
 //provides methods for manipulating the HTML
 //TODO: We should *not* using String methods for manipulating HTML!
 private static class MyXWPFHtmlDocument extends POIXMLDocumentPart {

  private String html;
  private String id;

  private MyXWPFHtmlDocument(PackagePart part, String id) throws Exception {
   super(part);
   this.html = "<!DOCTYPE html><html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"><style></style><title>HTML import</title></head><body></body>";
   this.id = id;
  }

  private String getId() {
   return id;
  }

  private String getHtml() {
   return html;
  }

  private void setHtml(String html) {
   this.html = html;
  }

  @Override
  protected void commit() throws IOException {
   PackagePart part = getPackagePart();
   OutputStream out = part.getOutputStream();
   Writer writer = new OutputStreamWriter(out, "UTF-8");
   writer.write(html);
   writer.close();
   out.close();
  }

 }

 //the XWPFRelation for /word/htmlDoc#.html
 private final static class XWPFHtmlRelation extends POIXMLRelation {
  private XWPFHtmlRelation() {
   super(
    "text/html", 
    "http://schemas.openxmlformats.org/officeDocument/2006/relationships/aFChunk", 
    "/word/htmlDoc#.html");
  }
 }
}

另见