使用 SAX 解析器拆分 XML
Split XML using SAX parser
我有以下 xml 文件。
<Engineers>
<Engineer>
<Name>JOHN</Name>
<Position>STL</Position>
<Team>SS</Team>
</Engineer>
<Engineer>
<Name>UDAY</Name>
<Position>TL</Position>
<Team>SG</Team>
</Engineer>
<Engineer>
<Name>INDRA</Name>
<Position>Director</Position>
<Team>PP</Team>
</Engineer>
</Engineers>
当 Xpath 被指定为 Engineers/Enginner 时,我需要将这个 xml 拆分成更小的 xml 字符串。
更小的xml字符串如下
<Engineers>
<Engineer>
<Name>INDRA</Name>
<Position>Director</Position>
<Team>PP</Team>
</Engineer>
</Engineers>
<Engineers>
<Engineer>
<Name>JOHN</Name>
<Position>STL</Position>
<Team>SS</Team>
</Engineer>
</Engineers>
到目前为止,我已经使用 SAX 实现了以下内容,我们可以在 XML 中获取元素,但不是我 want.How 我可以继续吗??
public class ReadSAX
{
public static void main( String[] args )
{
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler() {
public void startElement(String uri, String localName,
String qName, Attributes attributes)
throws SAXException {
System.out.println("Start Element :" + qName);
public void endElement(String uri, String localName,
String qName)
throws SAXException {
System.out.println("End Element :" + qName);
}
public void characters(char ch[], int start, int length)
throws SAXException {
System.out.println(new String(ch, start, length));
}
};
File file = new File("c:\file.xml");
InputStream inputStream= new FileInputStream(file);
Reader reader = new InputStreamReader(inputStream,"UTF-8");
InputSource is = new InputSource(reader);
is.setEncoding("UTF-8");
saxParser.parse(is, handler);
} catch (Exception e) {
e.printStackTrace();
}
}
}
为什么要使用这种低级编码方法?
在 XSLT 2.0 中它只是
<xsl:template match="/">
<xsl:for-each select="Engineers/Engineer">
<xsl:result-document select="{position()}.xml">
<Engineers>
<xsl:copy-of select="."/>
</Engineers>
</xsl:result-document>
</xsl:for-each>
</xsl:template>
如果这会占用太多内存,请使用流式 XSLT 3.0 处理器来解决问题。
我认为您需要做的是使用 VTD-XML 的剪切和粘贴功能...本文,题为 java api 的性能分析xml处理中,会在vtd-xml..
上告诉你更多
http://sdiwc.us/digitlib/journal_paper.php?paper=00000582.pdf
import com.ximpleware.*;
import java.io.*;
public class splitXML {
public static void main(String[] args) throws VTDException, IOException {
VTDGen vg = new VTDGen();
if (!vg.parseFile("d:\xml\input.xml", false)){
System.out.println("error");
return;
}
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
ap.selectXPath("/engineers/engineer");
int i=0,n=0;
FileOutputStream fos =null;
byte[] stag="<engineers>".getBytes();
byte[] etag="</engineers>".getBytes();
while((i=ap.evalXPath())!=-1){
fos.write(stag);
fos = new FileOutputStream("d:\xml\output"+(++n)+".xml");
long l = vn.getElementFragment();
fos.write(vn.getXML().getBytes(), (int)l, (int)(l>>32));
fos.write(etag);
fos.close();
}
}
}
我有以下 xml 文件。
<Engineers>
<Engineer>
<Name>JOHN</Name>
<Position>STL</Position>
<Team>SS</Team>
</Engineer>
<Engineer>
<Name>UDAY</Name>
<Position>TL</Position>
<Team>SG</Team>
</Engineer>
<Engineer>
<Name>INDRA</Name>
<Position>Director</Position>
<Team>PP</Team>
</Engineer>
</Engineers>
当 Xpath 被指定为 Engineers/Enginner 时,我需要将这个 xml 拆分成更小的 xml 字符串。
更小的xml字符串如下
<Engineers>
<Engineer>
<Name>INDRA</Name>
<Position>Director</Position>
<Team>PP</Team>
</Engineer>
</Engineers>
<Engineers>
<Engineer>
<Name>JOHN</Name>
<Position>STL</Position>
<Team>SS</Team>
</Engineer>
</Engineers>
到目前为止,我已经使用 SAX 实现了以下内容,我们可以在 XML 中获取元素,但不是我 want.How 我可以继续吗??
public class ReadSAX
{
public static void main( String[] args )
{
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler() {
public void startElement(String uri, String localName,
String qName, Attributes attributes)
throws SAXException {
System.out.println("Start Element :" + qName);
public void endElement(String uri, String localName,
String qName)
throws SAXException {
System.out.println("End Element :" + qName);
}
public void characters(char ch[], int start, int length)
throws SAXException {
System.out.println(new String(ch, start, length));
}
};
File file = new File("c:\file.xml");
InputStream inputStream= new FileInputStream(file);
Reader reader = new InputStreamReader(inputStream,"UTF-8");
InputSource is = new InputSource(reader);
is.setEncoding("UTF-8");
saxParser.parse(is, handler);
} catch (Exception e) {
e.printStackTrace();
}
}
}
为什么要使用这种低级编码方法?
在 XSLT 2.0 中它只是
<xsl:template match="/">
<xsl:for-each select="Engineers/Engineer">
<xsl:result-document select="{position()}.xml">
<Engineers>
<xsl:copy-of select="."/>
</Engineers>
</xsl:result-document>
</xsl:for-each>
</xsl:template>
如果这会占用太多内存,请使用流式 XSLT 3.0 处理器来解决问题。
我认为您需要做的是使用 VTD-XML 的剪切和粘贴功能...本文,题为 java api 的性能分析xml处理中,会在vtd-xml..
上告诉你更多http://sdiwc.us/digitlib/journal_paper.php?paper=00000582.pdf
import com.ximpleware.*;
import java.io.*;
public class splitXML {
public static void main(String[] args) throws VTDException, IOException {
VTDGen vg = new VTDGen();
if (!vg.parseFile("d:\xml\input.xml", false)){
System.out.println("error");
return;
}
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
ap.selectXPath("/engineers/engineer");
int i=0,n=0;
FileOutputStream fos =null;
byte[] stag="<engineers>".getBytes();
byte[] etag="</engineers>".getBytes();
while((i=ap.evalXPath())!=-1){
fos.write(stag);
fos = new FileOutputStream("d:\xml\output"+(++n)+".xml");
long l = vn.getElementFragment();
fos.write(vn.getXML().getBytes(), (int)l, (int)(l>>32));
fos.write(etag);
fos.close();
}
}
}