JAVA XML : 获取内容节点
JAVA XML : get content Node
我有一个 xml 这样的 :
<root>
<countries>
<country id="98" nom="Espagne"/>
<country id="76" nom="France"/>
</countries>
</root>
我可以用这个读取内部根标签:
Document doc = DocumentBuilderFactory.newInstance()
.newDocumentBuilder().parse(XmlFile);
System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
Node NodeCountries = doc.getElementsByTagName("countries").item(0);
System.out.println(nodeToString(NodeCountries));
private static String nodeToString(Node node) throws Exception{
StringWriter sw = new StringWriter();
Transformer t = TransformerFactory.newInstance().newTransformer();
t.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
t.setOutputProperty(OutputKeys.INDENT, "yes");
t.transform(new DOMSource(node), new StreamResult(sw));
return sw.toString();
}
但我无法像这样获取国家/地区标签内的所有内容:
<country id="98" nom="Espagne"/>
<country id="76" nom="France"/>
以下示例将打印 <country id="98" nom="Espagne"/><country id="76" nom="France"/>
:
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.InputSource;
import java.io.StringReader;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.ls.LSSerializer;
...
String xml = "<root><countries><country id=\"98\" nom=\"Espagne\"/><country id=\"76\" nom=\"France\"/></countries></root>";
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
InputSource is = new InputSource(new StringReader(xml));
Document doc = builder.parse(is);
Node node = doc.getElementsByTagName("countries").item(0);
String innerXml = getInnerXml(node);
System.out.println(innerXml);
辅助方法 getInnerXml(node)
如下所示:
private String getInnerXml(Node node) {
DOMImplementationLS lsImpl = (DOMImplementationLS) node.getOwnerDocument().getImplementation().getFeature("LS", "3.0");
LSSerializer lsSerializer = lsImpl.createLSSerializer();
lsSerializer.getDomConfig().setParameter("xml-declaration", false);
NodeList childNodes = node.getChildNodes();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < childNodes.getLength(); i++) {
sb.append(lsSerializer.writeToString(childNodes.item(i)));
}
return sb.toString();
}
如果我误解了要求(再次!),请告诉我。
这里的警告是这不是一个很好的解决方案。它涉及构建 XML "by hand"(即字符串连接),如果输入意外不同或复杂,则存在结果脆弱甚至损坏的风险。
我有一个 xml 这样的 :
<root>
<countries>
<country id="98" nom="Espagne"/>
<country id="76" nom="France"/>
</countries>
</root>
我可以用这个读取内部根标签:
Document doc = DocumentBuilderFactory.newInstance()
.newDocumentBuilder().parse(XmlFile);
System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
Node NodeCountries = doc.getElementsByTagName("countries").item(0);
System.out.println(nodeToString(NodeCountries));
private static String nodeToString(Node node) throws Exception{
StringWriter sw = new StringWriter();
Transformer t = TransformerFactory.newInstance().newTransformer();
t.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
t.setOutputProperty(OutputKeys.INDENT, "yes");
t.transform(new DOMSource(node), new StreamResult(sw));
return sw.toString();
}
但我无法像这样获取国家/地区标签内的所有内容:
<country id="98" nom="Espagne"/>
<country id="76" nom="France"/>
以下示例将打印 <country id="98" nom="Espagne"/><country id="76" nom="France"/>
:
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.InputSource;
import java.io.StringReader;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.ls.LSSerializer;
...
String xml = "<root><countries><country id=\"98\" nom=\"Espagne\"/><country id=\"76\" nom=\"France\"/></countries></root>";
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
InputSource is = new InputSource(new StringReader(xml));
Document doc = builder.parse(is);
Node node = doc.getElementsByTagName("countries").item(0);
String innerXml = getInnerXml(node);
System.out.println(innerXml);
辅助方法 getInnerXml(node)
如下所示:
private String getInnerXml(Node node) {
DOMImplementationLS lsImpl = (DOMImplementationLS) node.getOwnerDocument().getImplementation().getFeature("LS", "3.0");
LSSerializer lsSerializer = lsImpl.createLSSerializer();
lsSerializer.getDomConfig().setParameter("xml-declaration", false);
NodeList childNodes = node.getChildNodes();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < childNodes.getLength(); i++) {
sb.append(lsSerializer.writeToString(childNodes.item(i)));
}
return sb.toString();
}
如果我误解了要求(再次!),请告诉我。
这里的警告是这不是一个很好的解决方案。它涉及构建 XML "by hand"(即字符串连接),如果输入意外不同或复杂,则存在结果脆弱甚至损坏的风险。