SAX 解析器不遵循引用
SAX parser doesn't follow references
我正在尝试解析来自 xsd 的 HL7 消息定义。我将模式定义分成两个文件。第一个文件包含实际消息定义,第二个文件包含消息中的段定义。
我正在尝试调整示例代码以从此处 https://gist.github.com/helderdarocha/8791651 解析 XML。我不明白为什么 SAX 解析器不遵循引用。
这是我的 xsd 定义的两个示例。
第一个文件具有以下定义
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
targetNamespace="http://www.xsd_porcessor.org/parser"
xmlns="http://www.xsd_porcessor.org/parser"
elementFormDefault="qualified"
attributeFormDefault="unqualified">
<xs:include schemaLocation="segments.xsd"/>
<xs:complexType name="ADT.01.MESSAGE">
<xs:sequence>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH"/>
<xs:element maxOccurs="1" minOccurs="1" ref="EVN"/>
<xs:element maxOccurs="1" minOccurs="1" ref="PID"/>
<xs:element maxOccurs="1" minOccurs="1" ref="PV1"/>
<xs:element maxOccurs="1" minOccurs="1" ref="IN1"/>
<xs:element maxOccurs="1" minOccurs="1" ref="IN2"/>
</xs:sequence>
</xs:complexType>
<xs:element name="ADT.A01" type="ADT.01.MESSAGE"/>
</xs:schema>
第二个文件有以下header
<?xml version="1.1" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
targetNamespace="http://www.xsd_porcessor.org/parser"
xmlns="http://www.xsd_porcessor.org/parser"
elementFormDefault="qualified"
attributeFormDefault="unqualified">
...以及表示为复杂类型的大量段定义。下面是一个例子
<xs:complexType name="MSH.SEGMENT">
<xs:sequence>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.1.FieldSeparator"/>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.2.ServiceString"/>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.3.SendingApplication"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.4.SendingFacility"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.5.ReceivingApplication"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.6.ReceivingFacility"/>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.7.DateTimeOfMessage"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.8.Security"/>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.9.MessageType"/>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.10.MessageControlID"/>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.11.ProcessingID"/>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.12.VersionID"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.13.SequenceNumber"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.14.ContinuationPointer"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.15.AcceptAcknowledgmentType"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.16.ApplicationAcknowledgmentType"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.17.CountryCode"/>
<xs:element maxOccurs="unbounded" minOccurs="0" ref="MSH.18.CharacterSet"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.19.PrincipalLanguageOfMessage"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.20.AlternateCharacterSetHandlingScheme"/>
<xs:element maxOccurs="unbounded" minOccurs="0" ref="MSH.21.MessageProfileIdentifier"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.22.SendingResponsibleOrganization"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.23.ReceivingResponsibleOrganization"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.24.SendingNetworkAddress"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.25.ReceivingNetworkAddress"/>
</xs:sequence>
</xs:complexType>
<xs:element name="MSH" type="MSH.SEGMENT"/>
这是一个经过调整的解析器本身
package ca.parser.xml;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.*;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
public class SAXReaderExample {
public static final String PATH = "resources";
public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException {
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser sp = spf.newSAXParser();
XMLReader reader = sp.getXMLReader();
reader.setContentHandler(new SchemaSaxHandler());
reader.parse(new InputSource(new FileInputStream(new File(PATH, "messages.xsd"))));
}
}
class SchemaSaxHandler extends DefaultHandler {
// temporary - always null when tag closes
private String currentSimpleTypeName;
private String currentSimpleTypeBaseType;
private SchemaElement currentElement;
private SchemaComplexType currentComplexType;
private List<SchemaElement> currentSequence;
// cumulative - will use the data when XML finishes
private Map<String, String> simpleTypes = new HashMap<>();
private Map<String, SchemaComplexType> complexTypes = new HashMap<>();
private SchemaElement rootElement;
@Override
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
if (qName.equals("xs:simpleType")) {
currentSimpleTypeName = atts.getValue("name");
}
if (qName.equals("xs:restriction")) {
currentSimpleTypeBaseType = atts.getValue("base");
}
if (qName.equals("xs:complexType")) {
currentComplexType = new SchemaComplexType();
currentComplexType.setName(atts.getValue("name"));
}
if (qName.equals("xs:sequence")) {
currentSequence = new ArrayList<>();
}
if (qName.equals("xs:element")) {
currentElement = new SchemaElement();
if (atts.getValue("name")==null) {
currentElement.setName(atts.getValue("ref"));
}else {
currentElement.setName(atts.getValue("name"));
}
currentElement.setType(atts.getValue("type"));
currentElement.setReference(atts.getValue("ref"));
if (currentSequence != null) {
currentSequence.add(currentElement);
} else {
rootElement = currentElement;
}
}
if (qName.equals("xs:attribute")) {
currentComplexType.addAttribute(atts.getValue("name"), atts.getValue("type"));
}
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (qName.equals("xs:simpleType")) {
simpleTypes.put(currentSimpleTypeName, currentSimpleTypeBaseType);
currentSimpleTypeName = null;
currentSimpleTypeBaseType = null;
}
if (qName.equals("xs:complexType")) {
complexTypes.put(currentComplexType.getName(), currentComplexType);
currentComplexType = null;
}
if (qName.equals("xs:sequence")) {
if (currentComplexType != null) {
currentComplexType.setChildren(currentSequence);
}
currentSequence = null;
}
}
@Override
public void endDocument() throws SAXException {
makeTree(rootElement);
printTree(rootElement, "");
}
public void makeTree(SchemaElement element) {
SchemaComplexType type = complexTypes.get(element.getType());
if (type != null) {
List<SchemaElement> children = type.getChildren();
element.setChildren(children);
for (SchemaElement child : children) {
makeTree(child);
}
element.setAttributes(type.getAttributes());
} else {
element.setType(simpleTypes.get(element.getType()));
}
}
private void printTree(SchemaElement element, String indent) {
System.out.println(indent + element.getName() + " : " + element.getType());
Map<String, String> attributes = element.getAttributes();
if (attributes != null) {
for (Map.Entry<String, String> entry : attributes.entrySet()) {
System.out.println(" @" + entry.getKey() + " : " + simpleTypes.get(entry.getValue()));
}
}
List<SchemaElement> children = element.getChildren();
if (children != null) {
for (SchemaElement child : children) {
printTree(child, indent + " ");
}
}
}
class SchemaElement {
private String name;
private String type;
private String reference;
public String getReference() {
return reference;
}
public void setReference(String reference) {
this.reference = reference;
}
private List<SchemaElement> children;
private Map<String, String> attributes;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public List<SchemaElement> getChildren() {
return children;
}
public void setChildren(List<SchemaElement> children) {
this.children = children;
}
public Map<String, String> getAttributes() {
return attributes;
}
public void setAttributes(Map<String, String> attributes) {
this.attributes = attributes;
}
}
class SchemaComplexType {
private String name;
private String reference;
private List<SchemaElement> children;
private Map<String, String> attributes = new HashMap<>();
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public List<SchemaElement> getChildren() {
return children;
}
public void setChildren(List<SchemaElement> children) {
this.children = children;
}
public Map<String, String> getAttributes() {
return attributes;
}
public void setAttributes(Map<String, String> attributes) {
this.attributes = attributes;
}
public String getReference() {
return reference;
}
public void setReference(String reference) {
this.reference=reference;
}
public void addAttribute(String name,String type) {
attributes.put(name, type);
}
}
有什么想法吗?感谢您的帮助。
谢谢。
听起来这里有两个不同的概念在起作用。
如果正在使用验证 SAX 解析器来解析 XML 的一部分,并根据 其 模式对其进行验证:
<xmlRootElement
xmlns="http://www.xsd_porcessor.org/parser"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.xsd_porcessor.org/parser messages.xsd">
...等,然后很明显,当该模式在幕后解析时,解析器将需要遵循其中的任何引用和导入。
但是,如果 .xsd
本身就是被解析的 XML,那么正如您已经发现的那样,它的元素将直接传递到 ContentHandler
。上面的 SchemaSaxHandler
需要做更多的工作来学习每个 xs:element
- 就像你已经在为 simpleTypes
和 complexTypes
地图做的那样 - 所以它们可以在以后解决来自 ref
.
如果您需要的是 XML 模式中已解析元素和类型的模型,那么值得探索幕后模式模型 - 在 XML 解析器中,例如Xerces。作为起点,这是使用 XNI - Xerces 本机接口:
File baseDir = new File("/myschemas");
XMLEntityResolver entityResolver = new XMLEntityResolver() {
@Override
public XMLInputSource resolveEntity(
XMLResourceIdentifier resourceIdentifier)
throws XNIException, IOException {
// E.g. resourceIdentifier.getLiteralSystemId() will be segments.xsd
String uri = new File(baseDir,
resourceIdentifier.getLiteralSystemId()).toURI()
.toString();
return new XMLInputSource(null, uri, null);
}
};
XMLSchemaLoader loader = new XMLSchemaLoader();
loader.setEntityResolver(entityResolver);
XSModel model = loader
.loadURI(new File(baseDir, "messages.xsd").toURI()
.toString());
System.out.println(model.getComponents(XSConstants.ELEMENT_DECLARATION));
这输出如:
{http://www.xsd_porcessor.org/parser}ADT.A01="http://www.xsd_porcessor.org/parser":ADT.A01
我正在尝试解析来自 xsd 的 HL7 消息定义。我将模式定义分成两个文件。第一个文件包含实际消息定义,第二个文件包含消息中的段定义。
我正在尝试调整示例代码以从此处 https://gist.github.com/helderdarocha/8791651 解析 XML。我不明白为什么 SAX 解析器不遵循引用。
这是我的 xsd 定义的两个示例。
第一个文件具有以下定义
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
targetNamespace="http://www.xsd_porcessor.org/parser"
xmlns="http://www.xsd_porcessor.org/parser"
elementFormDefault="qualified"
attributeFormDefault="unqualified">
<xs:include schemaLocation="segments.xsd"/>
<xs:complexType name="ADT.01.MESSAGE">
<xs:sequence>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH"/>
<xs:element maxOccurs="1" minOccurs="1" ref="EVN"/>
<xs:element maxOccurs="1" minOccurs="1" ref="PID"/>
<xs:element maxOccurs="1" minOccurs="1" ref="PV1"/>
<xs:element maxOccurs="1" minOccurs="1" ref="IN1"/>
<xs:element maxOccurs="1" minOccurs="1" ref="IN2"/>
</xs:sequence>
</xs:complexType>
<xs:element name="ADT.A01" type="ADT.01.MESSAGE"/>
</xs:schema>
第二个文件有以下header
<?xml version="1.1" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
targetNamespace="http://www.xsd_porcessor.org/parser"
xmlns="http://www.xsd_porcessor.org/parser"
elementFormDefault="qualified"
attributeFormDefault="unqualified">
...以及表示为复杂类型的大量段定义。下面是一个例子
<xs:complexType name="MSH.SEGMENT">
<xs:sequence>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.1.FieldSeparator"/>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.2.ServiceString"/>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.3.SendingApplication"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.4.SendingFacility"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.5.ReceivingApplication"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.6.ReceivingFacility"/>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.7.DateTimeOfMessage"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.8.Security"/>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.9.MessageType"/>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.10.MessageControlID"/>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.11.ProcessingID"/>
<xs:element maxOccurs="1" minOccurs="1" ref="MSH.12.VersionID"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.13.SequenceNumber"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.14.ContinuationPointer"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.15.AcceptAcknowledgmentType"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.16.ApplicationAcknowledgmentType"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.17.CountryCode"/>
<xs:element maxOccurs="unbounded" minOccurs="0" ref="MSH.18.CharacterSet"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.19.PrincipalLanguageOfMessage"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.20.AlternateCharacterSetHandlingScheme"/>
<xs:element maxOccurs="unbounded" minOccurs="0" ref="MSH.21.MessageProfileIdentifier"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.22.SendingResponsibleOrganization"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.23.ReceivingResponsibleOrganization"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.24.SendingNetworkAddress"/>
<xs:element maxOccurs="1" minOccurs="0" ref="MSH.25.ReceivingNetworkAddress"/>
</xs:sequence>
</xs:complexType>
<xs:element name="MSH" type="MSH.SEGMENT"/>
这是一个经过调整的解析器本身
package ca.parser.xml;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.*;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
public class SAXReaderExample {
public static final String PATH = "resources";
public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException {
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser sp = spf.newSAXParser();
XMLReader reader = sp.getXMLReader();
reader.setContentHandler(new SchemaSaxHandler());
reader.parse(new InputSource(new FileInputStream(new File(PATH, "messages.xsd"))));
}
}
class SchemaSaxHandler extends DefaultHandler {
// temporary - always null when tag closes
private String currentSimpleTypeName;
private String currentSimpleTypeBaseType;
private SchemaElement currentElement;
private SchemaComplexType currentComplexType;
private List<SchemaElement> currentSequence;
// cumulative - will use the data when XML finishes
private Map<String, String> simpleTypes = new HashMap<>();
private Map<String, SchemaComplexType> complexTypes = new HashMap<>();
private SchemaElement rootElement;
@Override
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
if (qName.equals("xs:simpleType")) {
currentSimpleTypeName = atts.getValue("name");
}
if (qName.equals("xs:restriction")) {
currentSimpleTypeBaseType = atts.getValue("base");
}
if (qName.equals("xs:complexType")) {
currentComplexType = new SchemaComplexType();
currentComplexType.setName(atts.getValue("name"));
}
if (qName.equals("xs:sequence")) {
currentSequence = new ArrayList<>();
}
if (qName.equals("xs:element")) {
currentElement = new SchemaElement();
if (atts.getValue("name")==null) {
currentElement.setName(atts.getValue("ref"));
}else {
currentElement.setName(atts.getValue("name"));
}
currentElement.setType(atts.getValue("type"));
currentElement.setReference(atts.getValue("ref"));
if (currentSequence != null) {
currentSequence.add(currentElement);
} else {
rootElement = currentElement;
}
}
if (qName.equals("xs:attribute")) {
currentComplexType.addAttribute(atts.getValue("name"), atts.getValue("type"));
}
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (qName.equals("xs:simpleType")) {
simpleTypes.put(currentSimpleTypeName, currentSimpleTypeBaseType);
currentSimpleTypeName = null;
currentSimpleTypeBaseType = null;
}
if (qName.equals("xs:complexType")) {
complexTypes.put(currentComplexType.getName(), currentComplexType);
currentComplexType = null;
}
if (qName.equals("xs:sequence")) {
if (currentComplexType != null) {
currentComplexType.setChildren(currentSequence);
}
currentSequence = null;
}
}
@Override
public void endDocument() throws SAXException {
makeTree(rootElement);
printTree(rootElement, "");
}
public void makeTree(SchemaElement element) {
SchemaComplexType type = complexTypes.get(element.getType());
if (type != null) {
List<SchemaElement> children = type.getChildren();
element.setChildren(children);
for (SchemaElement child : children) {
makeTree(child);
}
element.setAttributes(type.getAttributes());
} else {
element.setType(simpleTypes.get(element.getType()));
}
}
private void printTree(SchemaElement element, String indent) {
System.out.println(indent + element.getName() + " : " + element.getType());
Map<String, String> attributes = element.getAttributes();
if (attributes != null) {
for (Map.Entry<String, String> entry : attributes.entrySet()) {
System.out.println(" @" + entry.getKey() + " : " + simpleTypes.get(entry.getValue()));
}
}
List<SchemaElement> children = element.getChildren();
if (children != null) {
for (SchemaElement child : children) {
printTree(child, indent + " ");
}
}
}
class SchemaElement {
private String name;
private String type;
private String reference;
public String getReference() {
return reference;
}
public void setReference(String reference) {
this.reference = reference;
}
private List<SchemaElement> children;
private Map<String, String> attributes;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public List<SchemaElement> getChildren() {
return children;
}
public void setChildren(List<SchemaElement> children) {
this.children = children;
}
public Map<String, String> getAttributes() {
return attributes;
}
public void setAttributes(Map<String, String> attributes) {
this.attributes = attributes;
}
}
class SchemaComplexType {
private String name;
private String reference;
private List<SchemaElement> children;
private Map<String, String> attributes = new HashMap<>();
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public List<SchemaElement> getChildren() {
return children;
}
public void setChildren(List<SchemaElement> children) {
this.children = children;
}
public Map<String, String> getAttributes() {
return attributes;
}
public void setAttributes(Map<String, String> attributes) {
this.attributes = attributes;
}
public String getReference() {
return reference;
}
public void setReference(String reference) {
this.reference=reference;
}
public void addAttribute(String name,String type) {
attributes.put(name, type);
}
}
有什么想法吗?感谢您的帮助。
谢谢。
听起来这里有两个不同的概念在起作用。
如果正在使用验证 SAX 解析器来解析 XML 的一部分,并根据 其 模式对其进行验证:
<xmlRootElement
xmlns="http://www.xsd_porcessor.org/parser"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.xsd_porcessor.org/parser messages.xsd">
...等,然后很明显,当该模式在幕后解析时,解析器将需要遵循其中的任何引用和导入。
但是,如果 .xsd
本身就是被解析的 XML,那么正如您已经发现的那样,它的元素将直接传递到 ContentHandler
。上面的 SchemaSaxHandler
需要做更多的工作来学习每个 xs:element
- 就像你已经在为 simpleTypes
和 complexTypes
地图做的那样 - 所以它们可以在以后解决来自 ref
.
如果您需要的是 XML 模式中已解析元素和类型的模型,那么值得探索幕后模式模型 - 在 XML 解析器中,例如Xerces。作为起点,这是使用 XNI - Xerces 本机接口:
File baseDir = new File("/myschemas");
XMLEntityResolver entityResolver = new XMLEntityResolver() {
@Override
public XMLInputSource resolveEntity(
XMLResourceIdentifier resourceIdentifier)
throws XNIException, IOException {
// E.g. resourceIdentifier.getLiteralSystemId() will be segments.xsd
String uri = new File(baseDir,
resourceIdentifier.getLiteralSystemId()).toURI()
.toString();
return new XMLInputSource(null, uri, null);
}
};
XMLSchemaLoader loader = new XMLSchemaLoader();
loader.setEntityResolver(entityResolver);
XSModel model = loader
.loadURI(new File(baseDir, "messages.xsd").toURI()
.toString());
System.out.println(model.getComponents(XSConstants.ELEMENT_DECLARATION));
这输出如:
{http://www.xsd_porcessor.org/parser}ADT.A01="http://www.xsd_porcessor.org/parser":ADT.A01