使用 SAX 解析器正确构建字符串 Java
Building strings correctly with SAX Parser Java
我正在尝试读取结构未知的 XML 文件。这可能是一个文件:
<S:Envelope xmlns:S="http://anamespace">envelopeStart
<S:Body>bodyStart
<ns2:getNextResponse xmlns:ns2="http://anothernamespace">getNextResponseStart
<nextValue>9</nextValue>
getNextResponseEnd</ns2:getNextResponse>
bodyEnd</S:Body>
envelopeEnd</S:Envelope>
这是我实际使用的处理程序:
DefaultHandler handler = new DefaultHandler() {
StringBuilder builder;
Map<String, String> values = new HashMap<String, String>();
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
builder = new StringBuilder();
}
@Override
public void characters(char ch[], int start, int length) throws SAXException {
builder.append(new String(ch, start, length));
}
@Override
public void endElement(String uti, String localName, String qName) throws SAXException {
values.put(localName, builder.toString());
builder.setLength(0);
}
}
我面临的问题是,如果我为每个解析的新标签实例化一个新的 builder
,我将丢失我到目前为止阅读的所有开始文本(假设 characters
方法在一次调用中返回所有字符):
new Builder for the Envelope tag
reading characters: envelopeStart
new Builder for the Body tag
reading characters: bodyStart
...
new Builder for the nextValue tag <- this is the last reference to the builder that I have to use from now on
reading characters: 9
endElement: saving to Map ('nextValue', '9') and resetting length of the last builder instantiated
reading characters: getNextResponseEnd
endElement: saving to Map ('getNextResponse', 'getNextResponseEnd') and resetting length of the last builder instantiated
...
在这种情况下,values
HashMap 将具有这些值:
nextValue=9
getNextResponse=getNextResponseEnd (missing getNextResponseStart)
body=bodyEnd (missing bodyStart)
envelope=envelopeEnd (missing envelopeStart)
有没有办法在地图中保存每个标签的开始和结束字符串?
只保留一堆 StringBuilder:
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
public class Example {
public static void main(String... args) throws ParserConfigurationException, SAXException, IOException {
Map<String, String> values = new HashMap<String, String>();
DefaultHandler handler = new DefaultHandler() {
Stack<StringBuilder> builders = new Stack<>();
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
builders.push(new StringBuilder());
}
@Override
public void characters(char ch[], int start, int length) throws SAXException {
builders.peek().append(new String(ch, start, length));
}
@Override
public void endElement(String uti, String localName, String qName) throws SAXException {
values.put(localName, builders.peek().toString());
builders.pop();
}
};
String xml = "<S:Envelope xmlns:S=\"http://anamespace\">envelopeStart\n" +
" <S:Body>bodyStart\n" +
" <ns2:getNextResponse xmlns:ns2=\"http://anothernamespace\">getNextResponseStart\n" +
" <nextValue>9</nextValue>\n" +
" getNextResponseEnd</ns2:getNextResponse>\n" +
" bodyEnd</S:Body>\n" +
"envelopeEnd</S:Envelope>";
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
SAXParser saxParser = spf.newSAXParser();
XMLReader xmlReader = saxParser.getXMLReader();
xmlReader.setContentHandler(handler);
xmlReader.parse(new InputSource(new StringReader(xml)));
}
}
我正在尝试读取结构未知的 XML 文件。这可能是一个文件:
<S:Envelope xmlns:S="http://anamespace">envelopeStart
<S:Body>bodyStart
<ns2:getNextResponse xmlns:ns2="http://anothernamespace">getNextResponseStart
<nextValue>9</nextValue>
getNextResponseEnd</ns2:getNextResponse>
bodyEnd</S:Body>
envelopeEnd</S:Envelope>
这是我实际使用的处理程序:
DefaultHandler handler = new DefaultHandler() {
StringBuilder builder;
Map<String, String> values = new HashMap<String, String>();
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
builder = new StringBuilder();
}
@Override
public void characters(char ch[], int start, int length) throws SAXException {
builder.append(new String(ch, start, length));
}
@Override
public void endElement(String uti, String localName, String qName) throws SAXException {
values.put(localName, builder.toString());
builder.setLength(0);
}
}
我面临的问题是,如果我为每个解析的新标签实例化一个新的 builder
,我将丢失我到目前为止阅读的所有开始文本(假设 characters
方法在一次调用中返回所有字符):
new Builder for the Envelope tag
reading characters: envelopeStart
new Builder for the Body tag
reading characters: bodyStart
...
new Builder for the nextValue tag <- this is the last reference to the builder that I have to use from now on
reading characters: 9
endElement: saving to Map ('nextValue', '9') and resetting length of the last builder instantiated
reading characters: getNextResponseEnd
endElement: saving to Map ('getNextResponse', 'getNextResponseEnd') and resetting length of the last builder instantiated
...
在这种情况下,values
HashMap 将具有这些值:
nextValue=9
getNextResponse=getNextResponseEnd (missing getNextResponseStart)
body=bodyEnd (missing bodyStart)
envelope=envelopeEnd (missing envelopeStart)
有没有办法在地图中保存每个标签的开始和结束字符串?
只保留一堆 StringBuilder:
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
public class Example {
public static void main(String... args) throws ParserConfigurationException, SAXException, IOException {
Map<String, String> values = new HashMap<String, String>();
DefaultHandler handler = new DefaultHandler() {
Stack<StringBuilder> builders = new Stack<>();
@Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
builders.push(new StringBuilder());
}
@Override
public void characters(char ch[], int start, int length) throws SAXException {
builders.peek().append(new String(ch, start, length));
}
@Override
public void endElement(String uti, String localName, String qName) throws SAXException {
values.put(localName, builders.peek().toString());
builders.pop();
}
};
String xml = "<S:Envelope xmlns:S=\"http://anamespace\">envelopeStart\n" +
" <S:Body>bodyStart\n" +
" <ns2:getNextResponse xmlns:ns2=\"http://anothernamespace\">getNextResponseStart\n" +
" <nextValue>9</nextValue>\n" +
" getNextResponseEnd</ns2:getNextResponse>\n" +
" bodyEnd</S:Body>\n" +
"envelopeEnd</S:Envelope>";
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
SAXParser saxParser = spf.newSAXParser();
XMLReader xmlReader = saxParser.getXMLReader();
xmlReader.setContentHandler(handler);
xmlReader.parse(new InputSource(new StringReader(xml)));
}
}