如何使用 Apache POI 清空 XWPFDocument (DOCX) 中的所有页眉和页脚?
Ho to empty all headers and footers in an XWPFDocument (DOCX) using Apache POI?
以下 Java 代码已成功删除特定 DOCX 文件的页眉和页脚中的所有内容,但一个页脚除外(它是首页页脚)。检查 DOCX 后,顽皮的页脚在下面有 XML。您将如何删除其内容?
document = new XWPFDocument(new FileInputStream(filePath));
List<XWPFHeader> headers = document.getHeaderList();
for (XWPFHeader h : headers) {
ArrayList<XWPFParagraph> hParaArray = new ArrayList<XWPFParagraph>();
for (XWPFParagraph hPara : h.getParagraphs())
hParaArray.add(hPara);
hParaArray.forEach(hPara -> {
h.removeParagraph(hPara);
});
ArrayList<XWPFTable> hTblArray = new ArrayList<XWPFTable>();
for (XWPFTable hTbl : h.getTables())
hTblArray.add(hTbl);
hTblArray.forEach(hTbl -> {
h.removeTable(hTbl);
});
}
List<XWPFFooter> footers = document.getFooterList();
for (XWPFFooter f : footers) {
ArrayList<XWPFParagraph> fParaArray = new ArrayList<XWPFParagraph>();
for (XWPFParagraph fPara : f.getParagraphs())
fParaArray.add(fPara);
fParaArray.forEach(fPara -> {
f.removeParagraph(fPara);
});
ArrayList<XWPFTable> fTblArray = new ArrayList<XWPFTable>();
for (XWPFTable fTbl : f.getTables())
fTblArray.add(fTbl);
fTblArray.forEach(fTbl -> {
f.removeTable(fTbl);
});
}
footer3.xml:
<?xml version="1.0" encoding="UTF-8"?>
<w:ftr mc:Ignorable="w14 w15 w16se wp14" xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:cx="http://schemas.microsoft.com/office/drawing/2014/chartex" xmlns:cx1="http://schemas.microsoft.com/office/drawing/2015/9/8/chartex" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape">
<w:sdt>
<w:sdtPr>
<w:rPr>
<w:rFonts w:cs="Arial" />
<w:color w:val="0000FF" />
<w:sz w:val="16" />
<w:szCs w:val="16" />
<w:lang w:val="en_US" />
</w:rPr>
<w:id w:val="6695195" />
<w:placeholder>
<w:docPart w:val="68B9E76BF9434A3FAABE5342BB8B54F7" />
</w:placeholder>
</w:sdtPr>
<w:sdtEndPr />
<w:sdtContent>
<w:p w:rsidR="00A47874" w:rsidRPr="004D34A5" w:rsidRDefault="00945F6E" w:rsidP="00A47874">
<w:pPr>
<w:pBdr>
<w:top w:val="single" w:sz="4" w:space="1" w:color="auto" />
</w:pBdr>
<w:rPr>
<w:rFonts w:cs="Arial" />
<w:color w:val="0000FF" />
<w:sz w:val="16" />
<w:szCs w:val="16" />
<w:lang w:val="en_US" />
</w:rPr>
</w:pPr>
<w:r>
<w:rPr>
<w:rFonts w:cs="Arial" />
<w:color w:val="0000FF" />
<w:sz w:val="16" />
<w:szCs w:val="16" />
<w:lang w:val="en_US" />
</w:rPr>
<w:t>Some text that couldn't be removed</w:t>
</w:r>
</w:p>
</w:sdtContent>
</w:sdt>
</w:ftr>
您页脚中的 w:sdt
是 StructuredDocumentTag
又名 ContentControl. Apache POI
has only experimental class XWPFSDT。虽然它提供了 removeParagraph
和 removeTable
,但它在 XWPFHeaderFooter
和 XWPFDocument
中缺少 removeSDT
。因此,使用您的方法无法从页脚中删除 StructuredDocumentTag
s。
但是如果需要完全清空所有现有的页眉和页脚,那么可以使用 XWPFHeaderFooter.setHeaderFooter.
简单地用新的空内容覆盖所有页眉和页脚内容
示例:
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.apache.poi.xwpf.usermodel.*;
public class WordDoEmptyingHeaderFooter {
public static void main(String[] args) throws Exception {
String inFilePath = "./WordDocument.docx";
String outFilePath = "./WordDocumentNew.docx";
XWPFDocument document = new XWPFDocument(new FileInputStream(inFilePath));
for (XWPFHeader header : document.getHeaderList()) {
header.setHeaderFooter(org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtr.Factory.newInstance());
}
for (XWPFFooter footer : document.getFooterList()) {
footer.setHeaderFooter(org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtr.Factory.newInstance());
}
FileOutputStream out = new FileOutputStream(outFilePath);
document.write(out);
out.close();
document.close();
}
}
以下 Java 代码已成功删除特定 DOCX 文件的页眉和页脚中的所有内容,但一个页脚除外(它是首页页脚)。检查 DOCX 后,顽皮的页脚在下面有 XML。您将如何删除其内容?
document = new XWPFDocument(new FileInputStream(filePath));
List<XWPFHeader> headers = document.getHeaderList();
for (XWPFHeader h : headers) {
ArrayList<XWPFParagraph> hParaArray = new ArrayList<XWPFParagraph>();
for (XWPFParagraph hPara : h.getParagraphs())
hParaArray.add(hPara);
hParaArray.forEach(hPara -> {
h.removeParagraph(hPara);
});
ArrayList<XWPFTable> hTblArray = new ArrayList<XWPFTable>();
for (XWPFTable hTbl : h.getTables())
hTblArray.add(hTbl);
hTblArray.forEach(hTbl -> {
h.removeTable(hTbl);
});
}
List<XWPFFooter> footers = document.getFooterList();
for (XWPFFooter f : footers) {
ArrayList<XWPFParagraph> fParaArray = new ArrayList<XWPFParagraph>();
for (XWPFParagraph fPara : f.getParagraphs())
fParaArray.add(fPara);
fParaArray.forEach(fPara -> {
f.removeParagraph(fPara);
});
ArrayList<XWPFTable> fTblArray = new ArrayList<XWPFTable>();
for (XWPFTable fTbl : f.getTables())
fTblArray.add(fTbl);
fTblArray.forEach(fTbl -> {
f.removeTable(fTbl);
});
}
footer3.xml:
<?xml version="1.0" encoding="UTF-8"?>
<w:ftr mc:Ignorable="w14 w15 w16se wp14" xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:cx="http://schemas.microsoft.com/office/drawing/2014/chartex" xmlns:cx1="http://schemas.microsoft.com/office/drawing/2015/9/8/chartex" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" xmlns:w10="urn:schemas-microsoft-com:office:word" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape">
<w:sdt>
<w:sdtPr>
<w:rPr>
<w:rFonts w:cs="Arial" />
<w:color w:val="0000FF" />
<w:sz w:val="16" />
<w:szCs w:val="16" />
<w:lang w:val="en_US" />
</w:rPr>
<w:id w:val="6695195" />
<w:placeholder>
<w:docPart w:val="68B9E76BF9434A3FAABE5342BB8B54F7" />
</w:placeholder>
</w:sdtPr>
<w:sdtEndPr />
<w:sdtContent>
<w:p w:rsidR="00A47874" w:rsidRPr="004D34A5" w:rsidRDefault="00945F6E" w:rsidP="00A47874">
<w:pPr>
<w:pBdr>
<w:top w:val="single" w:sz="4" w:space="1" w:color="auto" />
</w:pBdr>
<w:rPr>
<w:rFonts w:cs="Arial" />
<w:color w:val="0000FF" />
<w:sz w:val="16" />
<w:szCs w:val="16" />
<w:lang w:val="en_US" />
</w:rPr>
</w:pPr>
<w:r>
<w:rPr>
<w:rFonts w:cs="Arial" />
<w:color w:val="0000FF" />
<w:sz w:val="16" />
<w:szCs w:val="16" />
<w:lang w:val="en_US" />
</w:rPr>
<w:t>Some text that couldn't be removed</w:t>
</w:r>
</w:p>
</w:sdtContent>
</w:sdt>
</w:ftr>
您页脚中的 w:sdt
是 StructuredDocumentTag
又名 ContentControl. Apache POI
has only experimental class XWPFSDT。虽然它提供了 removeParagraph
和 removeTable
,但它在 XWPFHeaderFooter
和 XWPFDocument
中缺少 removeSDT
。因此,使用您的方法无法从页脚中删除 StructuredDocumentTag
s。
但是如果需要完全清空所有现有的页眉和页脚,那么可以使用 XWPFHeaderFooter.setHeaderFooter.
简单地用新的空内容覆盖所有页眉和页脚内容示例:
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.apache.poi.xwpf.usermodel.*;
public class WordDoEmptyingHeaderFooter {
public static void main(String[] args) throws Exception {
String inFilePath = "./WordDocument.docx";
String outFilePath = "./WordDocumentNew.docx";
XWPFDocument document = new XWPFDocument(new FileInputStream(inFilePath));
for (XWPFHeader header : document.getHeaderList()) {
header.setHeaderFooter(org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtr.Factory.newInstance());
}
for (XWPFFooter footer : document.getFooterList()) {
footer.setHeaderFooter(org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtr.Factory.newInstance());
}
FileOutputStream out = new FileOutputStream(outFilePath);
document.write(out);
out.close();
document.close();
}
}