如何使用 XmlObject.Factory.parse 方法解析多个 < w:p> 标签

How to resolve multiple < w:p> tags using XmlObject.Factory.parse method

我要把一堆xml个标签解析成poi header的对象,所以我准备了如下字符串:

        String headerHomeString = "<w:p xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\">" +
                "<w:pPr>\n" +
                "<w:snapToGrid w:val=\"0\"/>\n" +
                "<w:spacing w:line=\"240\" w:lineRule=\"auto\"/>\n" +
                "<w:ind w:firstLine=\"0\" w:firstLineChars=\"0\"/>\n" +
                "<w:jc w:val=\"center\"/>\n" +
                "<w:rPr>\n" +
                "<w:b/>\n" +
                "<w:sz w:val=\"72\"/>\n" +
                "</w:rPr>\n" +
                "</w:pPr>\n" +
                "<w:r>\n" +
                "<w:rPr>\n" +
                "<w:b/>\n" +
                "<w:sz w:val=\"72\"/>\n" +
                "</w:rPr>\n" +
                "<w:t xml:space=\"preserve\"></w:t>\n" +
                "</w:r>\n" +
                "<w:r>\n" +
                "<w:rPr>\n" +
                "<w:b/>\n" +
                "<w:sz w:val=\"72\"/>\n" +
                "</w:rPr>\n" +
                "<w:t>Q/TX</w:t>\n" +
                "</w:r>\n" +
                "</w:p>\n";



我使用CTHdrFtr进行解析,这是从XmlObject:

开始实现的接口
        XWPFHeader xwpfHeaderHome = xwpfDocument.createHeader(HeaderFooterType.FIRST);
        CTHdrFtr ctHdrFtrHeaderHome = null;
        try {
            ctHdrFtrHeaderHome = CTHdrFtr.Factory.parse(headerHomeString);
        } catch (XmlException e) {
            e.printStackTrace();
        }
        xwpfHeaderHome.setHeaderFooter(ctHdrFtrHeaderHome);

它有一个不错的结果。



所以我想用这个方法在header中多加几段,所以准备了这样一个字符串:

            String headerHomeString = "<w:p xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\">\n" +
            "<w:pPr>\n" +
            "<w:snapToGrid w:val=\"0\"/>\n" +
            "<w:ind w:firstLine=\"1446\"/>" +
            "<w:jc w:val=\"center\"/>" +
            "<w:rPr>" +
            "<w:rFonts w:hint=\"eastAsia\"/>" +
            "<w:b/>" +
            "<w:sz w:val=\"72\"/>\n" +
            "</w:rPr>\n" +
            "</w:pPr>\n" +
            "<w:r>" +
            "<w:rPr>" +
            "<w:rFonts w:hint=\"eastAsia\"/>" +
            "<w:b/>" +
            "<w:sz w:val=\"72\"/>" +
            "</w:rPr>" +
            "<w:t xml:space=\"preserve\">                </w:t>" +
            "</w:r>" +
            "<w:r>" +
            "<w:rPr>" +
            "<w:rFonts/>" +
            "<w:b/>" +
            "<w:sz w:val=\"72\"/>" +
            "</w:rPr>" +
            "<w:t>Q/T</w:t>" +
            "</w:r>" +
            "<w:r>" +
            "<w:rPr>" +
            "<w:rFonts w:hint=\"eastAsia\"/>" +
            "<w:b/>" +
            "<w:sz w:val=\"72\"/>" +
            "</w:rPr>" +
            "<w:t>X</w:t>" +
            "</w:r>\n" +
            "</w:p>\n" +
            "\n" +
            "<w:p>" +
            "<w:pPr>" +
            "<w:adjustRightInd w:val=\"0\"/>" +
            "<w:snapToGrid w:val=\"0\"/>" +
            "<w:ind w:firstLine=\"940\"/>" +
            "<w:jc w:val=\"center\"/>" +
            "<w:outlineLvl w:val=\"0\"/>" +
            "<w:rPr><w:rFonts w:hint=\"eastAsia\"/>" +
            "<w:b/>" +
            "<w:spacing w:val=\"-26\"/>" +
            "<w:sz w:val=\"52\"/>" +
            "</w:rPr>" +
            "</w:pPr>" +
            "<w:r>" +
            "<w:rPr>" +
            "<w:rFonts w:hint=\"eastAsia\" />" +
            "<w:b/>" +
            "<w:spacing w:val=\"-26\"/>" +
            "<w:sz w:val=\"52\"/>" +
            "</w:rPr><w:t>The Second Paragraph</w:t></w:r>\n" +
            "</w:p>";

这里有两个 标签。


我又解析了一遍:

        XWPFHeader xwpfHeaderHome = xwpfDocument.createHeader(HeaderFooterType.FIRST);
        CTHdrFtr ctHdrFtrHeaderHome = null;
        try {
            ctHdrFtrHeaderHome = CTHdrFtr.Factory.parse(headerHomeString);
        } catch (XmlException e) {
            e.printStackTrace();
        }
        xwpfHeaderHome.setHeaderFooter(ctHdrFtrHeaderHome);

发生异常:

org.apache.xmlbeans.XmlException: error: The markings behind the root element in the document must be in the correct format.
    at org.apache.xmlbeans.impl.store.Locale$SaxLoader.load(Locale.java:3440)
    at org.apache.xmlbeans.impl.store.Locale.parse(Locale.java:708)
    at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:692)
    at org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:679)
    at org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(SchemaTypeLoaderBase.java:208)
    at org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHdrFtr$Factory.parse(Unknown Source)
    at dfa.core.procesor.CoreResolver.addHeaderFooter(CoreResolver.java:305)
    at dfa.core.procesor.CoreResolver.resolve(CoreResolver.java:106)
    at OPDocumentTest.coreResolverTest(OPDocumentTest.java:194)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.junit.runners.model.FrameworkMethod.runReflectiveCall(FrameworkMethod.java:59)
    at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
    at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56)
    at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
    at org.junit.runners.ParentRunner.evaluate(ParentRunner.java:306)
    at org.junit.runners.BlockJUnit4ClassRunner.evaluate(BlockJUnit4ClassRunner.java:100)
    at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366)
    at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103)
    at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63)
    at org.junit.runners.ParentRunner.run(ParentRunner.java:331)
    at org.junit.runners.ParentRunner.schedule(ParentRunner.java:79)
    at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329)
    at org.junit.runners.ParentRunner.access0(ParentRunner.java:66)
    at org.junit.runners.ParentRunner.evaluate(ParentRunner.java:293)
    at org.junit.runners.ParentRunner.evaluate(ParentRunner.java:306)
    at org.junit.runners.ParentRunner.run(ParentRunner.java:413)
    at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
    at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:68)
    at com.intellij.rt.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:33)
    at com.intellij.rt.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:230)
    at com.intellij.rt.junit.JUnitStarter.main(JUnitStarter.java:58)


为了解决这个问题,我尝试了以下方法: 两者 < w:p> 都将 xmlns 属性添加到标签

            String headerHomeString = "<w:p xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\">\n" +
            "<w:pPr>\n" +
            "<w:snapToGrid w:val=\"0\"/>\n" +
            "<w:ind w:firstLine=\"1446\"/>" +
            "<w:jc w:val=\"center\"/>" +
            "<w:rPr>" +
            "<w:rFonts w:hint=\"eastAsia\"/>" +
            "<w:b/>" +
            "<w:sz w:val=\"72\"/>\n" +
            "</w:rPr>\n" +
            "</w:pPr>\n" +
            "<w:r>" +
            "<w:rPr>" +
            "<w:rFonts w:hint=\"eastAsia\"/>" +
            "<w:b/>" +
            "<w:sz w:val=\"72\"/>" +
            "</w:rPr>" +
            "<w:t xml:space=\"preserve\">                </w:t>" +
            "</w:r>" +
            "<w:r>" +
            "<w:rPr>" +
            "<w:rFonts/>" +
            "<w:b/>" +
            "<w:sz w:val=\"72\"/>" +
            "</w:rPr>" +
            "<w:t>Q/T</w:t>" +
            "</w:r>" +
            "<w:r>" +
            "<w:rPr>" +
            "<w:rFonts w:hint=\"eastAsia\"/>" +
            "<w:b/>" +
            "<w:sz w:val=\"72\"/>" +
            "</w:rPr>" +
            "<w:t>X</w:t>" +
            "</w:r>\n" +
            "</w:p>\n" +
            "\n" +
            "<w:p xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\">" +
            "<w:pPr>" +
            "<w:adjustRightInd w:val=\"0\"/>" +
            "<w:snapToGrid w:val=\"0\"/>" +
            "<w:ind w:firstLine=\"940\"/>" +
            "<w:jc w:val=\"center\"/>" +
            "<w:outlineLvl w:val=\"0\"/>" +
            "<w:rPr><w:rFonts w:hint=\"eastAsia\"/>" +
            "<w:b/>" +
            "<w:spacing w:val=\"-26\"/>" +
            "<w:sz w:val=\"52\"/>" +
            "</w:rPr>" +
            "</w:pPr>" +
            "<w:r>" +
            "<w:rPr>" +
            "<w:rFonts w:hint=\"eastAsia\" />" +
            "<w:b/>" +
            "<w:spacing w:val=\"-26\"/>" +
            "<w:sz w:val=\"52\"/>" +
            "</w:rPr><w:t>The Second Paragraph</w:t></w:r>\n" +
            "</w:p>";

同样的问题



所以我不知道该怎么做才能解决这个问题,谢谢你的帮助。

String headerHomeString 中的 XML 需要格式正确。否则解析将失败。要形成良好的结构,它需要一个根元素。只要只有一个w:p元素,就是单根元素。但是如果有多个 w:p 元素,则需要将它们包装在根元素中。这将是一个 w:hdr 元素。

String headerHomeString = "<w:hdr xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\">\n" +
  "<w:p>\n" +
...
  "</w:p>\n" +
  "<w:p>" +
...
  "</w:p>\n" +
"</w:hdr>";

但是所有 XmlObject.Factory.parse 方法都期望解析 object 形式的 XML,这是 XML 元素(内部 XML)的内容。因此 String headerHomeString 中的 XML 无法直接解析为 CTHdrFtr,因为 w:hdr elemnet 不是 CTHdrFtr 的内容元素。这就是特殊 *Document object 的用途。这些为所需的元素提供吸气剂。

对于 CTHdrFtrHdrDocument。这可以从一个完整的header的XML解析出来,然后提供getHdr()得到CTHdrFtr

完整示例:

import java.io.FileOutputStream;

import org.apache.poi.xwpf.usermodel.*;
import org.apache.poi.wp.usermodel.HeaderFooterType;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;

public class CreateWordHeaderFromXMLString {

    static final String headerHomeString = "<w:hdr xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\">\n" +
            "<w:p>\n" +
            "<w:pPr>\n" +
            "<w:snapToGrid w:val=\"0\"/>\n" +
            "<w:ind w:firstLine=\"1446\"/>" +
            "<w:jc w:val=\"center\"/>" +
            "<w:rPr>" +
            "<w:rFonts w:hint=\"eastAsia\"/>" +
            "<w:b/>" +
            "<w:sz w:val=\"72\"/>\n" +
            "</w:rPr>\n" +
            "</w:pPr>\n" +
            "<w:r>" +
            "<w:rPr>" +
            "<w:rFonts w:hint=\"eastAsia\"/>" +
            "<w:b/>" +
            "<w:sz w:val=\"72\"/>" +
            "</w:rPr>" +
            "<w:t xml:space=\"preserve\">                </w:t>" +
            "</w:r>" +
            "<w:r>" +
            "<w:rPr>" +
            "<w:rFonts/>" +
            "<w:b/>" +
            "<w:sz w:val=\"72\"/>" +
            "</w:rPr>" +
            "<w:t>Q/T</w:t>" +
            "</w:r>" +
            "<w:r>" +
            "<w:rPr>" +
            "<w:rFonts w:hint=\"eastAsia\"/>" +
            "<w:b/>" +
            "<w:sz w:val=\"72\"/>" +
            "</w:rPr>" +
            "<w:t>X</w:t>" +
            "</w:r>\n" +
            "</w:p>\n" +
            "\n" +
            "<w:p>" +
            "<w:pPr>" +
            "<w:adjustRightInd w:val=\"0\"/>" +
            "<w:snapToGrid w:val=\"0\"/>" +
            "<w:ind w:firstLine=\"940\"/>" +
            "<w:jc w:val=\"center\"/>" +
            "<w:outlineLvl w:val=\"0\"/>" +
            "<w:rPr><w:rFonts w:hint=\"eastAsia\"/>" +
            "<w:b/>" +
            "<w:spacing w:val=\"-26\"/>" +
            "<w:sz w:val=\"52\"/>" +
            "</w:rPr>" +
            "</w:pPr>" +
            "<w:r>" +
            "<w:rPr>" +
            "<w:rFonts w:hint=\"eastAsia\" />" +
            "<w:b/>" +
            "<w:spacing w:val=\"-26\"/>" +
            "<w:sz w:val=\"52\"/>" +
            "</w:rPr><w:t>The Second Paragraph</w:t></w:r>\n" +
            "</w:p>\n" +
            "</w:hdr>";

    public static void main(String[] args) throws Exception {

        XWPFDocument xwpfDocument = new XWPFDocument();

        XWPFHeader xwpfHeaderHome = xwpfDocument.createHeader(HeaderFooterType.FIRST);

        HdrDocument hdrDocument = HdrDocument.Factory.parse(headerHomeString);
        CTHdrFtr ctHdrFtrHeaderHome = hdrDocument.getHdr();

        xwpfHeaderHome.setHeaderFooter(ctHdrFtrHeaderHome);

        FileOutputStream out = new FileOutputStream("./WordDocument.docx");
        xwpfDocument.write(out);
        out.close();
        xwpfDocument.close();

    }
}