Apache Poi XWPF - 我们如何将 docx 分成两部分?
Apache Poi XWPF - How do we split a docx into two sections?
我有一个现有文档(以字节为单位),我使用
将其解析为 XWPFDocument
InputStream is = new ByteArrayInputStream(docuByte);
XWPFDocument docx = new XWPFDocument(OPCPackage.open(is));
这份文件至少有 5 页。我打算在前两页(标题和目录页)设置空白页脚,并在第三页及以上设置页脚。
为此,我知道我需要将文档分成两个不同的部分。
section 1 - first and second page
section 2 - third page and up
但是,我找不到可以将文档分成两部分的方法。有人知道如何实现吗?
XWPFDocument
到目前为止还没有添加分节符的特殊方法。所以需要使用底层 org.openxmlformats.schemas.wordprocessingml.x2006.main.*
类.
Office Open 中的分节符 XML Word 文档 (*.docx
) 是段落属性中具有节属性设置的段落。所以需要的是在文档中插入这样一段话。要插入一个段落XWPFDocument
提供了一种方法insertNewParagraph(org.apache.xmlbeans.XmlCursor cursor)
。但要获得此光标位置,需要知道段落应插入何处。例如,这可以是包含特定文本的现有段落。
插入的部分属性与该段落上方的部分相关。
文档正文还有与最后一节相关的节属性。
以下代码说明了这一点。它搜索包含特定文本的段落。然后它在找到的段落之前插入一个具有节属性的段落,这些属性是前最后一个节属性的副本。然后它从新插入的部分属性中删除所有 header/footer 设置。之后,新插入段落上方的部分没有 header/footer 设置,而前 header/footer 设置保留在最后一节中。
import java.io.*;
import org.apache.poi.xwpf.usermodel.*;
public class WordInsertSectionbreak {
static org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr getDocumentBodySectPr(XWPFDocument document) {
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1 ctDocument = document.getDocument();
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody ctBody = ctDocument.getBody();
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr ctSectPrDocumentBody = ctBody.getSectPr();
return ctSectPrDocumentBody;
}
static org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr getNextSectPr(XWPFParagraph paragraph) {
// get the section settings of next section in document
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr ctSectPrNextSect = null;
// maybe next section settings are in a paragraph
XWPFDocument document = paragraph.getDocument();
int pos = document.getPosOfParagraph(paragraph);
for (int p = pos; p < document.getParagraphs().size(); p++) {
paragraph = document.getParagraphArray(p);
if (paragraph.getCTP().getPPr() != null) {
ctSectPrNextSect = paragraph.getCTP().getPPr().getSectPr();
}
if (ctSectPrNextSect != null) break;
}
// if not in a paragraph next section settings are in documetn body
if (ctSectPrNextSect == null) {
ctSectPrNextSect = getDocumentBodySectPr(document);
}
return ctSectPrNextSect;
}
static XWPFParagraph insertSectionbreak(XWPFDocument document, org.apache.xmlbeans.XmlCursor cursor) {
XWPFParagraph paragraph = null;;
// insert a paragraph for section settings for new section above and section break.
paragraph = document.insertNewParagraph(cursor);
// get next section properties, which were section properties for previous section above
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr ctSectPrNextSect = getNextSectPr(paragraph);
// set a copy of section properties for previous section above as section properties for new section
if (ctSectPrNextSect != null) {
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr ctSectPrNewSect = (org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr)ctSectPrNextSect.copy();
paragraph.getCTP().addNewPPr().setSectPr(ctSectPrNewSect);
return paragraph;
}
return null;
}
static XWPFParagraph getParagraphByText(XWPFDocument document, String text) {
for (XWPFParagraph paragraph : document.getParagraphs()) {
String paragraphText = paragraph.getText();
if (paragraphText.contains(text)) {
return paragraph;
}
}
return null;
}
static void removeHeadersAndFooters(XWPFParagraph sectionBreakParagraph) {
if (sectionBreakParagraph == null) return;
if (sectionBreakParagraph.getCTP().getPPr() != null) {
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr ctSectPr = sectionBreakParagraph.getCTP().getPPr().getSectPr();
// remove headers and footers from section
for (int i = ctSectPr.getHeaderReferenceArray().length-1; i >= 0; i--) {
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtrRef ctHdrFtrRef = ctSectPr.getHeaderReferenceArray(i);
ctSectPr.removeHeaderReference(i);
}
for (int i = ctSectPr.getFooterReferenceArray().length-1; i >= 0; i--) {
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtrRef ctHdrFtrRef = ctSectPr.getFooterReferenceArray(i);
ctSectPr.removeFooterReference(i);
}
}
}
public static void main(String[] args) throws Exception {
XWPFDocument document = new XWPFDocument(new FileInputStream("./WordDocument.docx"));
XWPFParagraph paragraph = getParagraphByText(document, "Some text to mark where section break shall be inserted");
if (paragraph != null) {
XWPFParagraph sectionBreakParagraph = insertSectionbreak(document, paragraph.getCTP().newCursor());
if (sectionBreakParagraph != null) {
removeHeadersAndFooters(sectionBreakParagraph);
}
}
FileOutputStream out = new FileOutputStream("./WordDocumentResult.docx");
document.write(out);
out.close();
document.close();
}
}
代码已经过测试,可以使用当前 apache poi 5.2.2
。
我有一个现有文档(以字节为单位),我使用
将其解析为 XWPFDocumentInputStream is = new ByteArrayInputStream(docuByte);
XWPFDocument docx = new XWPFDocument(OPCPackage.open(is));
这份文件至少有 5 页。我打算在前两页(标题和目录页)设置空白页脚,并在第三页及以上设置页脚。
为此,我知道我需要将文档分成两个不同的部分。
section 1 - first and second page
section 2 - third page and up
但是,我找不到可以将文档分成两部分的方法。有人知道如何实现吗?
XWPFDocument
到目前为止还没有添加分节符的特殊方法。所以需要使用底层 org.openxmlformats.schemas.wordprocessingml.x2006.main.*
类.
Office Open 中的分节符 XML Word 文档 (*.docx
) 是段落属性中具有节属性设置的段落。所以需要的是在文档中插入这样一段话。要插入一个段落XWPFDocument
提供了一种方法insertNewParagraph(org.apache.xmlbeans.XmlCursor cursor)
。但要获得此光标位置,需要知道段落应插入何处。例如,这可以是包含特定文本的现有段落。
插入的部分属性与该段落上方的部分相关。
文档正文还有与最后一节相关的节属性。
以下代码说明了这一点。它搜索包含特定文本的段落。然后它在找到的段落之前插入一个具有节属性的段落,这些属性是前最后一个节属性的副本。然后它从新插入的部分属性中删除所有 header/footer 设置。之后,新插入段落上方的部分没有 header/footer 设置,而前 header/footer 设置保留在最后一节中。
import java.io.*;
import org.apache.poi.xwpf.usermodel.*;
public class WordInsertSectionbreak {
static org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr getDocumentBodySectPr(XWPFDocument document) {
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1 ctDocument = document.getDocument();
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody ctBody = ctDocument.getBody();
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr ctSectPrDocumentBody = ctBody.getSectPr();
return ctSectPrDocumentBody;
}
static org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr getNextSectPr(XWPFParagraph paragraph) {
// get the section settings of next section in document
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr ctSectPrNextSect = null;
// maybe next section settings are in a paragraph
XWPFDocument document = paragraph.getDocument();
int pos = document.getPosOfParagraph(paragraph);
for (int p = pos; p < document.getParagraphs().size(); p++) {
paragraph = document.getParagraphArray(p);
if (paragraph.getCTP().getPPr() != null) {
ctSectPrNextSect = paragraph.getCTP().getPPr().getSectPr();
}
if (ctSectPrNextSect != null) break;
}
// if not in a paragraph next section settings are in documetn body
if (ctSectPrNextSect == null) {
ctSectPrNextSect = getDocumentBodySectPr(document);
}
return ctSectPrNextSect;
}
static XWPFParagraph insertSectionbreak(XWPFDocument document, org.apache.xmlbeans.XmlCursor cursor) {
XWPFParagraph paragraph = null;;
// insert a paragraph for section settings for new section above and section break.
paragraph = document.insertNewParagraph(cursor);
// get next section properties, which were section properties for previous section above
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr ctSectPrNextSect = getNextSectPr(paragraph);
// set a copy of section properties for previous section above as section properties for new section
if (ctSectPrNextSect != null) {
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr ctSectPrNewSect = (org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr)ctSectPrNextSect.copy();
paragraph.getCTP().addNewPPr().setSectPr(ctSectPrNewSect);
return paragraph;
}
return null;
}
static XWPFParagraph getParagraphByText(XWPFDocument document, String text) {
for (XWPFParagraph paragraph : document.getParagraphs()) {
String paragraphText = paragraph.getText();
if (paragraphText.contains(text)) {
return paragraph;
}
}
return null;
}
static void removeHeadersAndFooters(XWPFParagraph sectionBreakParagraph) {
if (sectionBreakParagraph == null) return;
if (sectionBreakParagraph.getCTP().getPPr() != null) {
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr ctSectPr = sectionBreakParagraph.getCTP().getPPr().getSectPr();
// remove headers and footers from section
for (int i = ctSectPr.getHeaderReferenceArray().length-1; i >= 0; i--) {
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtrRef ctHdrFtrRef = ctSectPr.getHeaderReferenceArray(i);
ctSectPr.removeHeaderReference(i);
}
for (int i = ctSectPr.getFooterReferenceArray().length-1; i >= 0; i--) {
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtrRef ctHdrFtrRef = ctSectPr.getFooterReferenceArray(i);
ctSectPr.removeFooterReference(i);
}
}
}
public static void main(String[] args) throws Exception {
XWPFDocument document = new XWPFDocument(new FileInputStream("./WordDocument.docx"));
XWPFParagraph paragraph = getParagraphByText(document, "Some text to mark where section break shall be inserted");
if (paragraph != null) {
XWPFParagraph sectionBreakParagraph = insertSectionbreak(document, paragraph.getCTP().newCursor());
if (sectionBreakParagraph != null) {
removeHeadersAndFooters(sectionBreakParagraph);
}
}
FileOutputStream out = new FileOutputStream("./WordDocumentResult.docx");
document.write(out);
out.close();
document.close();
}
}
代码已经过测试,可以使用当前 apache poi 5.2.2
。