使用 SAX 解析器正确构建字符串 Java

Building strings correctly with SAX Parser Java

我正在尝试读取结构未知的 XML 文件。这可能是一个文件:

<S:Envelope xmlns:S="http://anamespace">envelopeStart
    <S:Body>bodyStart
        <ns2:getNextResponse xmlns:ns2="http://anothernamespace">getNextResponseStart
            <nextValue>9</nextValue>
        getNextResponseEnd</ns2:getNextResponse>
    bodyEnd</S:Body>
envelopeEnd</S:Envelope>

这是我实际使用的处理程序:

DefaultHandler handler = new DefaultHandler() {
    StringBuilder builder;
    Map<String, String> values = new HashMap<String, String>();
    
    @Override
    public void startElement(String uri, String localName, String qName,
                             Attributes attributes) throws SAXException {
        builder = new StringBuilder();
    }

    @Override
    public void characters(char ch[], int start, int length) throws SAXException {
        builder.append(new String(ch, start, length));
    }

    @Override
    public void endElement(String uti, String localName, String qName) throws SAXException {
        values.put(localName, builder.toString());
        builder.setLength(0);
    }
}

我面临的问题是,如果我为每个解析的新标签实例化一个新的 builder,我将丢失我到目前为止阅读的所有开始文本(假设 characters 方法在一次调用中返回所有字符):

new Builder for the Envelope tag
reading characters: envelopeStart
new Builder for the Body tag
reading characters: bodyStart
...
new Builder for the nextValue tag <- this is the last reference to the builder that I have to use from now on
reading characters: 9
endElement: saving to Map ('nextValue', '9') and resetting length of the last builder instantiated 
reading characters: getNextResponseEnd
endElement: saving to Map ('getNextResponse', 'getNextResponseEnd') and resetting length of the last builder instantiated
...

在这种情况下,values HashMap 将具有这些值:

nextValue=9
getNextResponse=getNextResponseEnd (missing getNextResponseStart)
body=bodyEnd (missing bodyStart)
envelope=envelopeEnd (missing envelopeStart)

有没有办法在地图中保存每个标签的开始和结束字符串?

只保留一堆 StringBuilder:

import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;

public class Example {
    public static void main(String... args) throws ParserConfigurationException, SAXException, IOException {
        Map<String, String> values = new HashMap<String, String>();

        DefaultHandler handler = new DefaultHandler() {
            Stack<StringBuilder> builders = new Stack<>();

            @Override
            public void startElement(String uri, String localName, String qName,
                                     Attributes attributes) throws SAXException {
                builders.push(new StringBuilder());
            }

            @Override
            public void characters(char ch[], int start, int length) throws SAXException {
                builders.peek().append(new String(ch, start, length));
            }

            @Override
            public void endElement(String uti, String localName, String qName) throws SAXException {
                values.put(localName, builders.peek().toString());
                builders.pop();
            }
        };

        String xml = "<S:Envelope xmlns:S=\"http://anamespace\">envelopeStart\n" +
                     "    <S:Body>bodyStart\n" +
                     "        <ns2:getNextResponse xmlns:ns2=\"http://anothernamespace\">getNextResponseStart\n" +
                     "            <nextValue>9</nextValue>\n" +
                     "        getNextResponseEnd</ns2:getNextResponse>\n" +
                     "    bodyEnd</S:Body>\n" +
                     "envelopeEnd</S:Envelope>";
        SAXParserFactory spf = SAXParserFactory.newInstance();
        spf.setNamespaceAware(true);
        SAXParser saxParser = spf.newSAXParser();
        XMLReader xmlReader = saxParser.getXMLReader();
        xmlReader.setContentHandler(handler);
        xmlReader.parse(new InputSource(new StringReader(xml)));
    }
}