如何使用 Apache POI 从 .docx 文件中检索水印文本?
How to retrieve watermark text from .docx file using Apache POI?
如何使用 Apache POI 从 .docx 文件中获取水印文本
在API文档中,我看到createWatermark(String text)
但找不到getter水印。
private File file;
public MSDocParser(String filePath, DataSource dataSource) {
super(dataSource);
this.file = new File(filePath);
}
public void parse(RunnableTask task) throws ParserException {
textExtractor = ExtractorFactory.createExtractor(file);
if (textExtractor instanceof XWPFWordExtractor) {
XWPFDocument d = (XWPFDocument) textExtractor.getDocument();
XWPFHeaderFooterPolicy hf = d.getHeaderFooterPolicy();
// I want to print the watermark text here.
}
}
这是从文档中获取文本水印的最简单方法。
public String getWaterMark(XWPFDocument document) {
var sbWaterMark = new StringBuilder();
try {
XWPFHeader defaultHeader = document.getHeaderFooterPolicy().getDefaultHeader();
var declareNameSpaces = "declare namespace v='urn:schemas-microsoft-com:vml';";
final var xpathFilter = "*//v:shape/v:textpath/@string";
// a “watermark” in Word is nothing more than a graphic anchored to the header.
XmlObject[] xmlobjects = defaultHeader._getHdrFtr().selectPath(declareNameSpaces + xpathFilter);
if(xmlobjects != null && xmlobjects.length > 0) {
for (var xmlobj: xmlobjects {
sbWaterMark.append(
xmlobj.getDomNode().getNodeValue()).append("\n");
}
}
return sbWaterMark.toString();
} catch (NullPointerException ex) {
return sbWaterMark.toString();
} catch (Exception ex) {
logAggregator.error("Error while getting Watermark content from document: ", ex);
}
}
如何使用 Apache POI 从 .docx 文件中获取水印文本
在API文档中,我看到createWatermark(String text)
但找不到getter水印。
private File file;
public MSDocParser(String filePath, DataSource dataSource) {
super(dataSource);
this.file = new File(filePath);
}
public void parse(RunnableTask task) throws ParserException {
textExtractor = ExtractorFactory.createExtractor(file);
if (textExtractor instanceof XWPFWordExtractor) {
XWPFDocument d = (XWPFDocument) textExtractor.getDocument();
XWPFHeaderFooterPolicy hf = d.getHeaderFooterPolicy();
// I want to print the watermark text here.
}
}
这是从文档中获取文本水印的最简单方法。
public String getWaterMark(XWPFDocument document) {
var sbWaterMark = new StringBuilder();
try {
XWPFHeader defaultHeader = document.getHeaderFooterPolicy().getDefaultHeader();
var declareNameSpaces = "declare namespace v='urn:schemas-microsoft-com:vml';";
final var xpathFilter = "*//v:shape/v:textpath/@string";
// a “watermark” in Word is nothing more than a graphic anchored to the header.
XmlObject[] xmlobjects = defaultHeader._getHdrFtr().selectPath(declareNameSpaces + xpathFilter);
if(xmlobjects != null && xmlobjects.length > 0) {
for (var xmlobj: xmlobjects {
sbWaterMark.append(
xmlobj.getDomNode().getNodeValue()).append("\n");
}
}
return sbWaterMark.toString();
} catch (NullPointerException ex) {
return sbWaterMark.toString();
} catch (Exception ex) {
logAggregator.error("Error while getting Watermark content from document: ", ex);
}
}