值 "name" 和 "surname" 未读取 apache poi
The value "name" and "surname" aren't read apache poi
我的目的是读取文件 docx
并获取此文本“#name#”和“#surname#”并将值更改为另一个随意文本:
这是我的 docx
文件:
我这样做:
XWPFDocument docx = new XWPFDocument(OPCPackage.open("..."));
for (XWPFParagraph p : docx.getParagraphs()) {
List<XWPFRun> runs = p.getRuns();
if (runs != null) {
for (XWPFRun r : runs) {
String text = r.getText(0);
if (text != null && text.startsWith("#") && text.endsWith("#")) {
text = text.replace("#", "new ");
r.setText(text, 0);
}
}
}
}
for (XWPFTable tbl : docx.getTables()) {
for (XWPFTableRow row : tbl.getRows()) {
for (XWPFTableCell cell : row.getTableCells()) {
for (XWPFParagraph p : cell.getParagraphs()) {
for (XWPFRun r : p.getRuns()) {
String text = r.getText(0);
if (text != null && text.startsWith("#") && text.endsWith("#")) {
text = text.replace("#", "new ");
r.setText(text,0);
}
}
}
}
}
问题是我的代码读取了 docx
文件中的所有标签,但没有读取标签“#surname#”和“#name”。谁能帮帮我?
从你的屏幕截图来看,“#name#”和“#surmane#”似乎不直接在文档正文中,而是在绘图中(例如文本框或形状)。 XWPFDocument.getParagraphs
或 .getTables
或 apache poi
中的任何其他高级方法均未涵盖此类元素。所以你的主要问题是你的代码根本没有遍历包含你的文本的段落。
从文档正文中真正获取所有段落的唯一方法是使用 XmlCursor
直接从 XML
中选择所有 w:p
元素。
下面的代码显示了这一点。它使用 XmlCursor
遍历文档正文中的所有 XWPFParagraphs
并在找到时替换文本。
对于替换过程,我更喜欢 中显示的 TextSegment
替换方法。这是必要的,因为即使包含的段落被遍历,由于格式、拼写检查或任何其他奇怪的原因,文本可能会在不同的文本运行中分开。 Microsoft Word
知道将文本奇怪地分成不同文本行的近乎无限的理由。
import java.io.*;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import org.apache.xmlbeans.XmlObject;
import org.apache.xmlbeans.XmlCursor;
import java.util.Map;
import java.util.HashMap;
import java.util.List;
import java.util.ArrayList;
public class WordReplaceTextSegment {
/**
* this methods parse the paragraph and search for the string searched.
* If it finds the string, it will return true and the position of the String
* will be saved in the parameter startPos.
*
* @param searched
* @param startPos
*/
static TextSegment searchText(XWPFParagraph paragraph, String searched, PositionInParagraph startPos) {
int startRun = startPos.getRun(),
startText = startPos.getText(),
startChar = startPos.getChar();
int beginRunPos = 0, candCharPos = 0;
boolean newList = false;
//CTR[] rArray = paragraph.getRArray(); //This does not contain all runs. It lacks hyperlink runs for ex.
java.util.List<XWPFRun> runs = paragraph.getRuns();
int beginTextPos = 0, beginCharPos = 0; //must be outside the for loop
//for (int runPos = startRun; runPos < rArray.length; runPos++) {
for (int runPos = startRun; runPos < runs.size(); runPos++) {
//int beginTextPos = 0, beginCharPos = 0, textPos = 0, charPos; //int beginTextPos = 0, beginCharPos = 0 must be outside the for loop
int textPos = 0, charPos;
//CTR ctRun = rArray[runPos];
CTR ctRun = runs.get(runPos).getCTR();
XmlCursor c = ctRun.newCursor();
c.selectPath("./*");
try {
while (c.toNextSelection()) {
XmlObject o = c.getObject();
if (o instanceof CTText) {
if (textPos >= startText) {
String candidate = ((CTText) o).getStringValue();
if (runPos == startRun) {
charPos = startChar;
} else {
charPos = 0;
}
for (; charPos < candidate.length(); charPos++) {
if ((candidate.charAt(charPos) == searched.charAt(0)) && (candCharPos == 0)) {
beginTextPos = textPos;
beginCharPos = charPos;
beginRunPos = runPos;
newList = true;
}
if (candidate.charAt(charPos) == searched.charAt(candCharPos)) {
if (candCharPos + 1 < searched.length()) {
candCharPos++;
} else if (newList) {
TextSegment segment = new TextSegment();
segment.setBeginRun(beginRunPos);
segment.setBeginText(beginTextPos);
segment.setBeginChar(beginCharPos);
segment.setEndRun(runPos);
segment.setEndText(textPos);
segment.setEndChar(charPos);
return segment;
}
} else {
candCharPos = 0;
}
}
}
textPos++;
} else if (o instanceof CTProofErr) {
c.removeXml();
} else if (o instanceof CTRPr) {
//do nothing
} else {
candCharPos = 0;
}
}
} finally {
c.dispose();
}
}
return null;
}
static void replaceTextSegment(XWPFParagraph paragraph, String textToFind, String replacement) {
TextSegment foundTextSegment = null;
PositionInParagraph startPos = new PositionInParagraph(0, 0, 0);
//while((foundTextSegment = paragraph.searchText(textToFind, startPos)) != null) { // search all text segments having text to find
while((foundTextSegment = searchText(paragraph, textToFind, startPos)) != null) { // search all text segments having text to find
System.out.println(foundTextSegment.getBeginRun()+":"+foundTextSegment.getBeginText()+":"+foundTextSegment.getBeginChar());
System.out.println(foundTextSegment.getEndRun()+":"+foundTextSegment.getEndText()+":"+foundTextSegment.getEndChar());
// maybe there is text before textToFind in begin run
XWPFRun beginRun = paragraph.getRuns().get(foundTextSegment.getBeginRun());
String textInBeginRun = beginRun.getText(foundTextSegment.getBeginText());
String textBefore = textInBeginRun.substring(0, foundTextSegment.getBeginChar()); // we only need the text before
// maybe there is text after textToFind in end run
XWPFRun endRun = paragraph.getRuns().get(foundTextSegment.getEndRun());
String textInEndRun = endRun.getText(foundTextSegment.getEndText());
String textAfter = textInEndRun.substring(foundTextSegment.getEndChar() + 1); // we only need the text after
if (foundTextSegment.getEndRun() == foundTextSegment.getBeginRun()) {
textInBeginRun = textBefore + replacement + textAfter; // if we have only one run, we need the text before, then the replacement, then the text after in that run
} else {
textInBeginRun = textBefore + replacement; // else we need the text before followed by the replacement in begin run
endRun.setText(textAfter, foundTextSegment.getEndText()); // and the text after in end run
}
beginRun.setText(textInBeginRun, foundTextSegment.getBeginText());
// runs between begin run and end run needs to be removed
for (int runBetween = foundTextSegment.getEndRun() - 1; runBetween > foundTextSegment.getBeginRun(); runBetween--) {
paragraph.removeRun(runBetween); // remove not needed runs
}
}
}
static List<XmlObject> getCTPObjects(XWPFDocument doc) {
List<XmlObject> result = new ArrayList<XmlObject>();
//create cursor selecting all paragraph elements
XmlCursor cursor = doc.getDocument().newCursor();
cursor.selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//*/w:p");
while(cursor.hasNextSelection()) {
cursor.toNextSelection();
XmlObject obj = cursor.getObject();
// add only if the paragraph contains at least a run containing text
if (obj.selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' ./w:r/w:t").length > 0) {
result.add(obj);
}
}
return result;
}
static void traverseAllParagraphsAndReplace(XWPFDocument doc, Map<String, String> replacements) throws Exception {
//This gets all XWPFParagraph out od the stored XML and replaces
//first get all CTP objects
List<XmlObject> allCTPObjects = getCTPObjects(doc);
//then traverse them and create XWPFParagraphs from them and do the replacing
for (XmlObject obj : allCTPObjects) {
XWPFParagraph paragraph = null;
if (obj instanceof CTP) {
CTP p = (CTP)obj;
paragraph = new XWPFParagraph(p, doc);
} else {
CTP p = CTP.Factory.parse(obj.xmlText());
paragraph = new XWPFParagraph(p, doc);
}
if (paragraph != null) {
for (String textToFind : replacements.keySet()) {
String replacement = replacements.get(textToFind);
if (paragraph.getText().contains(textToFind)) replaceTextSegment(paragraph, textToFind, replacement);
}
}
obj.set(paragraph.getCTP());
}
}
public static void main(String[] args) throws Exception {
XWPFDocument doc = new XWPFDocument(new FileInputStream("source.docx"));
Map<String, String> replacements;
replacements = new HashMap<String, String>();
replacements.put("#name#", "Axel");
replacements.put("#surename#", "Richter");
traverseAllParagraphsAndReplace(doc, replacements);
FileOutputStream out = new FileOutputStream("result.docx");
doc.write(out);
out.close();
doc.close();
}
}
我的目的是读取文件 docx
并获取此文本“#name#”和“#surname#”并将值更改为另一个随意文本:
这是我的 docx
文件:
我这样做:
XWPFDocument docx = new XWPFDocument(OPCPackage.open("..."));
for (XWPFParagraph p : docx.getParagraphs()) {
List<XWPFRun> runs = p.getRuns();
if (runs != null) {
for (XWPFRun r : runs) {
String text = r.getText(0);
if (text != null && text.startsWith("#") && text.endsWith("#")) {
text = text.replace("#", "new ");
r.setText(text, 0);
}
}
}
}
for (XWPFTable tbl : docx.getTables()) {
for (XWPFTableRow row : tbl.getRows()) {
for (XWPFTableCell cell : row.getTableCells()) {
for (XWPFParagraph p : cell.getParagraphs()) {
for (XWPFRun r : p.getRuns()) {
String text = r.getText(0);
if (text != null && text.startsWith("#") && text.endsWith("#")) {
text = text.replace("#", "new ");
r.setText(text,0);
}
}
}
}
}
问题是我的代码读取了 docx
文件中的所有标签,但没有读取标签“#surname#”和“#name”。谁能帮帮我?
从你的屏幕截图来看,“#name#”和“#surmane#”似乎不直接在文档正文中,而是在绘图中(例如文本框或形状)。 XWPFDocument.getParagraphs
或 .getTables
或 apache poi
中的任何其他高级方法均未涵盖此类元素。所以你的主要问题是你的代码根本没有遍历包含你的文本的段落。
从文档正文中真正获取所有段落的唯一方法是使用 XmlCursor
直接从 XML
中选择所有 w:p
元素。
下面的代码显示了这一点。它使用 XmlCursor
遍历文档正文中的所有 XWPFParagraphs
并在找到时替换文本。
对于替换过程,我更喜欢 TextSegment
替换方法。这是必要的,因为即使包含的段落被遍历,由于格式、拼写检查或任何其他奇怪的原因,文本可能会在不同的文本运行中分开。 Microsoft Word
知道将文本奇怪地分成不同文本行的近乎无限的理由。
import java.io.*;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import org.apache.xmlbeans.XmlObject;
import org.apache.xmlbeans.XmlCursor;
import java.util.Map;
import java.util.HashMap;
import java.util.List;
import java.util.ArrayList;
public class WordReplaceTextSegment {
/**
* this methods parse the paragraph and search for the string searched.
* If it finds the string, it will return true and the position of the String
* will be saved in the parameter startPos.
*
* @param searched
* @param startPos
*/
static TextSegment searchText(XWPFParagraph paragraph, String searched, PositionInParagraph startPos) {
int startRun = startPos.getRun(),
startText = startPos.getText(),
startChar = startPos.getChar();
int beginRunPos = 0, candCharPos = 0;
boolean newList = false;
//CTR[] rArray = paragraph.getRArray(); //This does not contain all runs. It lacks hyperlink runs for ex.
java.util.List<XWPFRun> runs = paragraph.getRuns();
int beginTextPos = 0, beginCharPos = 0; //must be outside the for loop
//for (int runPos = startRun; runPos < rArray.length; runPos++) {
for (int runPos = startRun; runPos < runs.size(); runPos++) {
//int beginTextPos = 0, beginCharPos = 0, textPos = 0, charPos; //int beginTextPos = 0, beginCharPos = 0 must be outside the for loop
int textPos = 0, charPos;
//CTR ctRun = rArray[runPos];
CTR ctRun = runs.get(runPos).getCTR();
XmlCursor c = ctRun.newCursor();
c.selectPath("./*");
try {
while (c.toNextSelection()) {
XmlObject o = c.getObject();
if (o instanceof CTText) {
if (textPos >= startText) {
String candidate = ((CTText) o).getStringValue();
if (runPos == startRun) {
charPos = startChar;
} else {
charPos = 0;
}
for (; charPos < candidate.length(); charPos++) {
if ((candidate.charAt(charPos) == searched.charAt(0)) && (candCharPos == 0)) {
beginTextPos = textPos;
beginCharPos = charPos;
beginRunPos = runPos;
newList = true;
}
if (candidate.charAt(charPos) == searched.charAt(candCharPos)) {
if (candCharPos + 1 < searched.length()) {
candCharPos++;
} else if (newList) {
TextSegment segment = new TextSegment();
segment.setBeginRun(beginRunPos);
segment.setBeginText(beginTextPos);
segment.setBeginChar(beginCharPos);
segment.setEndRun(runPos);
segment.setEndText(textPos);
segment.setEndChar(charPos);
return segment;
}
} else {
candCharPos = 0;
}
}
}
textPos++;
} else if (o instanceof CTProofErr) {
c.removeXml();
} else if (o instanceof CTRPr) {
//do nothing
} else {
candCharPos = 0;
}
}
} finally {
c.dispose();
}
}
return null;
}
static void replaceTextSegment(XWPFParagraph paragraph, String textToFind, String replacement) {
TextSegment foundTextSegment = null;
PositionInParagraph startPos = new PositionInParagraph(0, 0, 0);
//while((foundTextSegment = paragraph.searchText(textToFind, startPos)) != null) { // search all text segments having text to find
while((foundTextSegment = searchText(paragraph, textToFind, startPos)) != null) { // search all text segments having text to find
System.out.println(foundTextSegment.getBeginRun()+":"+foundTextSegment.getBeginText()+":"+foundTextSegment.getBeginChar());
System.out.println(foundTextSegment.getEndRun()+":"+foundTextSegment.getEndText()+":"+foundTextSegment.getEndChar());
// maybe there is text before textToFind in begin run
XWPFRun beginRun = paragraph.getRuns().get(foundTextSegment.getBeginRun());
String textInBeginRun = beginRun.getText(foundTextSegment.getBeginText());
String textBefore = textInBeginRun.substring(0, foundTextSegment.getBeginChar()); // we only need the text before
// maybe there is text after textToFind in end run
XWPFRun endRun = paragraph.getRuns().get(foundTextSegment.getEndRun());
String textInEndRun = endRun.getText(foundTextSegment.getEndText());
String textAfter = textInEndRun.substring(foundTextSegment.getEndChar() + 1); // we only need the text after
if (foundTextSegment.getEndRun() == foundTextSegment.getBeginRun()) {
textInBeginRun = textBefore + replacement + textAfter; // if we have only one run, we need the text before, then the replacement, then the text after in that run
} else {
textInBeginRun = textBefore + replacement; // else we need the text before followed by the replacement in begin run
endRun.setText(textAfter, foundTextSegment.getEndText()); // and the text after in end run
}
beginRun.setText(textInBeginRun, foundTextSegment.getBeginText());
// runs between begin run and end run needs to be removed
for (int runBetween = foundTextSegment.getEndRun() - 1; runBetween > foundTextSegment.getBeginRun(); runBetween--) {
paragraph.removeRun(runBetween); // remove not needed runs
}
}
}
static List<XmlObject> getCTPObjects(XWPFDocument doc) {
List<XmlObject> result = new ArrayList<XmlObject>();
//create cursor selecting all paragraph elements
XmlCursor cursor = doc.getDocument().newCursor();
cursor.selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//*/w:p");
while(cursor.hasNextSelection()) {
cursor.toNextSelection();
XmlObject obj = cursor.getObject();
// add only if the paragraph contains at least a run containing text
if (obj.selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' ./w:r/w:t").length > 0) {
result.add(obj);
}
}
return result;
}
static void traverseAllParagraphsAndReplace(XWPFDocument doc, Map<String, String> replacements) throws Exception {
//This gets all XWPFParagraph out od the stored XML and replaces
//first get all CTP objects
List<XmlObject> allCTPObjects = getCTPObjects(doc);
//then traverse them and create XWPFParagraphs from them and do the replacing
for (XmlObject obj : allCTPObjects) {
XWPFParagraph paragraph = null;
if (obj instanceof CTP) {
CTP p = (CTP)obj;
paragraph = new XWPFParagraph(p, doc);
} else {
CTP p = CTP.Factory.parse(obj.xmlText());
paragraph = new XWPFParagraph(p, doc);
}
if (paragraph != null) {
for (String textToFind : replacements.keySet()) {
String replacement = replacements.get(textToFind);
if (paragraph.getText().contains(textToFind)) replaceTextSegment(paragraph, textToFind, replacement);
}
}
obj.set(paragraph.getCTP());
}
}
public static void main(String[] args) throws Exception {
XWPFDocument doc = new XWPFDocument(new FileInputStream("source.docx"));
Map<String, String> replacements;
replacements = new HashMap<String, String>();
replacements.put("#name#", "Axel");
replacements.put("#surename#", "Richter");
traverseAllParagraphsAndReplace(doc, replacements);
FileOutputStream out = new FileOutputStream("result.docx");
doc.write(out);
out.close();
doc.close();
}
}