使用 Java 拆分更大的 XML 文件(保留父项的属性和兄弟项)
Splitting a larger size XML file using Java (Retaining Parent's attributes and Siblings)
考虑 XML 文件,Report.xml :
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Report FileName="abc.bin" reportDate="05/12/2016 02:44:22 AM">
<Statistics>
<child value="abc">
<subchild>...</subchild>
</child>
<child value="xyz">
<subchild>...</subchild>
</child>
</Statistics>
<Properties>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</childn>
</Properties>
<OverallStatistics>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</child1>
</OverallStatistics>
</Report>
我只想将上面的 XML 文件拆分为:
ReportSplit1.xml
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Report FileName="abc.bin" reportDate="05/12/2016 02:44:22 AM">
<Statistics>
<child value="abc">
<subchild>...</subchild>
</child>
</Statistics>
<Properties>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</childn>
</Properties>
<OverallStatistics>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</child1>
</OverallStatistics>
</Report>
ReportSplit2.xml
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Report FileName="abc.bin" reportDate="05/12/2016 02:44:22 AM">
<Statistics>
<child value="xyz">
<subchild>...</subchild>
</child>
</Statistics>
<Properties>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</childn>
</Properties>
<OverallStatistics>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</child1>
</OverallStatistics>
</Report>
即保留父节点的属性并保留兄弟节点。应该只对 Statistics 节点中的子节点进行拆分。
通过将代码段更改为
来遵循 link 中给出的解决方法
package xmlsplitting;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.*;
public class XmlSplit
{
static public void main(String[] arg) throws Exception
{
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse("D:\Analyzer\FileSplit\Report.xml");
TransformerFactory tranFactory = TransformerFactory.newInstance();
Transformer aTransformer = tranFactory.newTransformer();
XPath xpath = XPathFactory.newInstance().newXPath();
NodeList list = (NodeList)xpath.evaluate("//Report/Statistics/child", doc, XPathConstants.NODESET);
for (int i=1; i<list.getLength(); i++)
{
Node element = list.item(i).cloneNode(true);
if(element.hasChildNodes())
{
Source src = new DOMSource(element);
FileOutputStream fs = new FileOutputStream( "D:\Analyzer\FileSplit\ReportSplit"+ i + ".xml");
Result dest = new StreamResult(fs);
aTransformer.transform(src, dest);
fs.close();
}
}
}
}
已实现的 XML 文件拆分为:
ReportSplit1.xml
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<child value="abc">
<subchild>...</subchild>
</child>
ReportSplit2.xml
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<child value="xyz">
<subchild>...</subchild>
</child>
谁能提供解决方法来实现所需的 XML 文件拆分?
您的 xpath 评估表示您只寻找 child 及以下。
您需要为其他字段制作额外的表达式,即 statistics 和 properties,因为我想您也想在那里分开。
考虑使用 XSLT,一种声明性的专用编程语言来转换 XML 文档,而不是 XPath,因为您需要整个文档转换。出于您的目的,值循环上的嵌入式动态 XSLT 运行 可以输出多个 XML 文件:
XSLT脚本(嵌入在下面,这里的示例使用'abc',它被迭代使用和替换)
<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output version="1.0" encoding="UTF-8" indent="yes" />
<xsl:strip-space elements="*"/>
<!-- Identity Transform -->
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="child[not(@value='abc')]"/>
</xsl:transform>
Java 脚本
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.transform.OutputKeys;
import java.io.*;
import java.net.URISyntaxException;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
public class XmlSplit {
public static void main(String[] args) throws IOException, URISyntaxException,
SAXException, ParserConfigurationException,
TransformerException {
// Load XML Source
String inputXML = "/path/to/XMLSource.xml";
// Declare XML Values Array
String[] xmlVals = {"abc", "xyz"};
// Iterate through Values running dynamic, embedded XSLT
for (String s: xmlVals) {
String outputXML = "/path/to/output_" + s + ".xml";
String xslStr = String.join("\n",
"<xsl:transform xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\" version=\"1.0\">",
"<xsl:output version=\"1.0\" encoding=\"UTF-8\" indent=\"yes\" />",
"<xsl:strip-space elements=\"*\"/>",
"<xsl:template match=\"@*|node()\">",
"<xsl:copy>",
"<xsl:apply-templates select=\"@*|node()\"/>",
"</xsl:copy>",
"</xsl:template>",
"<xsl:template match=\"child[not(@value='"+ s +"')]\"/>",
"</xsl:transform>");
Source xslt = new StreamSource(new StringReader(xslStr));
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
Document doc = docBuilder.parse (new File(inputXML));
// XSLT Transformation with pretty print
TransformerFactory prettyPrint = TransformerFactory.newInstance();
Transformer transformer = prettyPrint.newTransformer(xslt);
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.STANDALONE, "yes");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
// Output Result to File
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File(outputXML));
transformer.transform(source, result);
}
}
}
考虑 XML 文件,Report.xml :
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Report FileName="abc.bin" reportDate="05/12/2016 02:44:22 AM">
<Statistics>
<child value="abc">
<subchild>...</subchild>
</child>
<child value="xyz">
<subchild>...</subchild>
</child>
</Statistics>
<Properties>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</childn>
</Properties>
<OverallStatistics>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</child1>
</OverallStatistics>
</Report>
我只想将上面的 XML 文件拆分为:
ReportSplit1.xml
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Report FileName="abc.bin" reportDate="05/12/2016 02:44:22 AM">
<Statistics>
<child value="abc">
<subchild>...</subchild>
</child>
</Statistics>
<Properties>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</childn>
</Properties>
<OverallStatistics>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</child1>
</OverallStatistics>
</Report>
ReportSplit2.xml
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Report FileName="abc.bin" reportDate="05/12/2016 02:44:22 AM">
<Statistics>
<child value="xyz">
<subchild>...</subchild>
</child>
</Statistics>
<Properties>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</childn>
</Properties>
<OverallStatistics>
<child1>...</child1>
<child2>...</child2>
.
.
.
<childn>...</child1>
</OverallStatistics>
</Report>
即保留父节点的属性并保留兄弟节点。应该只对 Statistics 节点中的子节点进行拆分。
通过将代码段更改为
来遵循 link 中给出的解决方法package xmlsplitting;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.*;
public class XmlSplit
{
static public void main(String[] arg) throws Exception
{
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse("D:\Analyzer\FileSplit\Report.xml");
TransformerFactory tranFactory = TransformerFactory.newInstance();
Transformer aTransformer = tranFactory.newTransformer();
XPath xpath = XPathFactory.newInstance().newXPath();
NodeList list = (NodeList)xpath.evaluate("//Report/Statistics/child", doc, XPathConstants.NODESET);
for (int i=1; i<list.getLength(); i++)
{
Node element = list.item(i).cloneNode(true);
if(element.hasChildNodes())
{
Source src = new DOMSource(element);
FileOutputStream fs = new FileOutputStream( "D:\Analyzer\FileSplit\ReportSplit"+ i + ".xml");
Result dest = new StreamResult(fs);
aTransformer.transform(src, dest);
fs.close();
}
}
}
}
已实现的 XML 文件拆分为:
ReportSplit1.xml
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<child value="abc">
<subchild>...</subchild>
</child>
ReportSplit2.xml
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<child value="xyz">
<subchild>...</subchild>
</child>
谁能提供解决方法来实现所需的 XML 文件拆分?
您的 xpath 评估表示您只寻找 child 及以下。 您需要为其他字段制作额外的表达式,即 statistics 和 properties,因为我想您也想在那里分开。
考虑使用 XSLT,一种声明性的专用编程语言来转换 XML 文档,而不是 XPath,因为您需要整个文档转换。出于您的目的,值循环上的嵌入式动态 XSLT 运行 可以输出多个 XML 文件:
XSLT脚本(嵌入在下面,这里的示例使用'abc',它被迭代使用和替换)
<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output version="1.0" encoding="UTF-8" indent="yes" />
<xsl:strip-space elements="*"/>
<!-- Identity Transform -->
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="child[not(@value='abc')]"/>
</xsl:transform>
Java 脚本
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.transform.OutputKeys;
import java.io.*;
import java.net.URISyntaxException;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
public class XmlSplit {
public static void main(String[] args) throws IOException, URISyntaxException,
SAXException, ParserConfigurationException,
TransformerException {
// Load XML Source
String inputXML = "/path/to/XMLSource.xml";
// Declare XML Values Array
String[] xmlVals = {"abc", "xyz"};
// Iterate through Values running dynamic, embedded XSLT
for (String s: xmlVals) {
String outputXML = "/path/to/output_" + s + ".xml";
String xslStr = String.join("\n",
"<xsl:transform xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\" version=\"1.0\">",
"<xsl:output version=\"1.0\" encoding=\"UTF-8\" indent=\"yes\" />",
"<xsl:strip-space elements=\"*\"/>",
"<xsl:template match=\"@*|node()\">",
"<xsl:copy>",
"<xsl:apply-templates select=\"@*|node()\"/>",
"</xsl:copy>",
"</xsl:template>",
"<xsl:template match=\"child[not(@value='"+ s +"')]\"/>",
"</xsl:transform>");
Source xslt = new StreamSource(new StringReader(xslStr));
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
Document doc = docBuilder.parse (new File(inputXML));
// XSLT Transformation with pretty print
TransformerFactory prettyPrint = TransformerFactory.newInstance();
Transformer transformer = prettyPrint.newTransformer(xslt);
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.STANDALONE, "yes");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
// Output Result to File
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new File(outputXML));
transformer.transform(source, result);
}
}
}