使用 Java 拆分更大的 XML 文件(保留父项的属性和兄弟项)

Splitting a larger size XML file using Java (Retaining Parent's attributes and Siblings)

考虑 XML 文件,Report.xml :

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
   <Report FileName="abc.bin" reportDate="05/12/2016 02:44:22 AM">
      <Statistics>
        <child value="abc">
         <subchild>...</subchild>
        </child>
        <child value="xyz">
         <subchild>...</subchild>
        </child>
      </Statistics>
      <Properties>
        <child1>...</child1>
        <child2>...</child2>
        .
        .
        .
        <childn>...</childn>
      </Properties>
      <OverallStatistics>
        <child1>...</child1>
        <child2>...</child2>
        .
        .
        .
        <childn>...</child1>
      </OverallStatistics>
  </Report>

我只想将上面的 XML 文件拆分为:

ReportSplit1.xml

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <Report FileName="abc.bin" reportDate="05/12/2016 02:44:22 AM">
   <Statistics>
      <child value="abc">
         <subchild>...</subchild>
      </child>
   </Statistics>
   <Properties>
        <child1>...</child1>
        <child2>...</child2>
        .
        .
        .
        <childn>...</childn>
   </Properties>
   <OverallStatistics>
        <child1>...</child1>
        <child2>...</child2>
        .
        .
        .
        <childn>...</child1>
    </OverallStatistics>
</Report>

ReportSplit2.xml

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <Report FileName="abc.bin" reportDate="05/12/2016 02:44:22 AM">
   <Statistics>
      <child value="xyz">
         <subchild>...</subchild>
      </child>
   </Statistics>
   <Properties>
        <child1>...</child1>
        <child2>...</child2>
        .
        .
        .
        <childn>...</childn>
   </Properties>
   <OverallStatistics>
        <child1>...</child1>
        <child2>...</child2>
        .
        .
        .
        <childn>...</child1>
    </OverallStatistics>
</Report>

即保留父节点的属性并保留兄弟节点。应该只对 Statistics 节点中的子节点进行拆分。

通过将代码段更改为

来遵循 link 中给出的解决方法
package xmlsplitting;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.*;
import javax.xml.transform.*; 
import javax.xml.transform.dom.DOMSource; 
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.*;
public class XmlSplit
{
    static public void main(String[] arg) throws Exception
    {
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc = builder.parse("D:\Analyzer\FileSplit\Report.xml");
        TransformerFactory tranFactory = TransformerFactory.newInstance(); 
        Transformer aTransformer = tranFactory.newTransformer(); 
        XPath xpath = XPathFactory.newInstance().newXPath();
        NodeList list = (NodeList)xpath.evaluate("//Report/Statistics/child", doc, XPathConstants.NODESET);
        for (int i=1; i<list.getLength(); i++)
        {
            Node element = list.item(i).cloneNode(true);
            if(element.hasChildNodes())
            {
                  Source src = new DOMSource(element); 
                  FileOutputStream fs = new FileOutputStream( "D:\Analyzer\FileSplit\ReportSplit"+ i + ".xml");
                  Result dest = new StreamResult(fs);
                  aTransformer.transform(src, dest);
                  fs.close();
            }
        }
    }
}

已实现的 XML 文件拆分为:

ReportSplit1.xml

  <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
   <child value="abc">
      <subchild>...</subchild>
   </child>

ReportSplit2.xml

  <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
   <child value="xyz">
      <subchild>...</subchild>
   </child>

谁能提供解决方法来实现所需的 XML 文件拆分?

您的 xpath 评估表示您只寻找 child 及以下。 您需要为其他字段制作额外的表达式,即 statisticsproperties,因为我想您也想在那里分开。

考虑使用 XSLT,一种声明性的专用编程语言来转换 XML 文档,而不是 XPath,因为您需要整个文档转换。出于您的目的,值循环上的嵌入式动态 XSLT 运行 可以输出多个 XML 文件:

XSLT脚本(嵌入在下面,这里的示例使用'abc',它被迭代使用和替换)

<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output version="1.0" encoding="UTF-8" indent="yes" />
<xsl:strip-space elements="*"/>

  <!-- Identity Transform -->
  <xsl:template match="@*|node()">
    <xsl:copy>
      <xsl:apply-templates select="@*|node()"/>
    </xsl:copy>
  </xsl:template>

  <xsl:template match="child[not(@value='abc')]"/>

</xsl:transform>

Java 脚本

import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.transform.OutputKeys;

import java.io.*;
import java.net.URISyntaxException;

import org.w3c.dom.Document;
import org.xml.sax.SAXException;
public class XmlSplit {
    public static void main(String[] args) throws IOException, URISyntaxException,
                                                  SAXException, ParserConfigurationException,
                                                  TransformerException {

        // Load XML Source
        String inputXML = "/path/to/XMLSource.xml";

        // Declare XML Values Array
        String[] xmlVals = {"abc", "xyz"};

        // Iterate through Values running dynamic, embedded XSLT
        for (String s: xmlVals) {
            String outputXML = "/path/to/output_" + s + ".xml";

            String xslStr = String.join("\n",
                "<xsl:transform xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\" version=\"1.0\">",
                "<xsl:output version=\"1.0\" encoding=\"UTF-8\" indent=\"yes\" />",
                "<xsl:strip-space elements=\"*\"/>",
                "<xsl:template match=\"@*|node()\">",
                "<xsl:copy>",
                "<xsl:apply-templates select=\"@*|node()\"/>",
                "</xsl:copy>",
                "</xsl:template>",
                "<xsl:template match=\"child[not(@value='"+ s +"')]\"/>",
                "</xsl:transform>");

            Source xslt = new StreamSource(new StringReader(xslStr));            
            DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();            
            DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
            Document doc = docBuilder.parse (new File(inputXML));

            // XSLT Transformation  with pretty print
            TransformerFactory prettyPrint = TransformerFactory.newInstance();
            Transformer transformer = prettyPrint.newTransformer(xslt);

            transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
            transformer.setOutputProperty(OutputKeys.STANDALONE, "yes");
            transformer.setOutputProperty(OutputKeys.METHOD, "xml");
            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
            transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
            transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");                        

            // Output Result to File
            DOMSource source = new DOMSource(doc);
            StreamResult result = new StreamResult(new File(outputXML));        
            transformer.transform(source, result);
        }

    }
}