使用 XPath 遍历 XML 个文档而不打印重复项
Iterating through XML document with XPath without printing duplicates
我有这个 XML 文档:
<?xml version="1.0" encoding="UTF-8"?>
<tns:request xmlns:tns="urn">
<tns:CorrectingData>
<tns:CorrectingDataBlock>
<tns:CurrentVersionData>current</tns:CurrentVersionData>
<tns:NewVersionData>new</tns:NewVersionData>
</tns:CorrectingDataBlock>
<tns:CorrectingDataBlock>
<tns:CurrentVersionData>100</tns:CurrentVersionData>
<tns:NewVersionData>200</tns:NewVersionData>
</tns:CorrectingDataBlock>
</tns:CorrectingData>
</tns:request>
和对应的XSD文档:
<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="request" type="tns:requestType">
</xsd:element>
<xsd:complexType name="requestType">
<xsd:sequence>
<xsd:element name="CorrectingData" type="tns:CorrectingDataType" minOccurs="0" maxOccurs="1">
</xsd:element>
</xsd:sequence>
</xsd:complexType>
<xsd:complexType name="CorrectingDataType">
<xsd:sequence>
<xsd:element name="CorrectingDataBlock" type="tns:CorrectingDataTextType" minOccurs="1" maxOccurs="unbounded"/>
</xsd:sequence>
</xsd:complexType>
<xsd:complexType name="CorrectingDataTextType">
<xsd:sequence>
<xsd:element name="CurrentVersionData" type="tns:string">
</xsd:element>
<xsd:element name="NewVersionData" type="tns:string">
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:schema>
我需要在编辑表单中显示整个 XSD 文档,并尽可能插入 XML 文档中的数据。 在我的示例中,我简化了代码,以便它将叶项的值输出到控制台。
为了从 XML 文档中插入数据,我想遍历每个 tns:CorrectingDataBlock
元素并打印其叶子的值(tns:CurrentVersionData
和 tns:NewVersionData
)。 我需要这样的输出:
current
new
100
200
我有这个 javascript 代码,它遍历 XSD 文档并创建 XPath,所以我可以用它从 XML 文档中查找和打印叶子的值。
// ...
// dataXPath = `/*[local-name()='${childElement.getAttribute("name")}']`
// I'm using recursive method, which fills dataXPath from root and passes it to child node all the way to the leaf.
// After I reach the leaf node I print its value.
const childElementDataXPath: string = dataXPath + `/*[local-name()='${childElement.getAttribute("name")}']`;
const snapshotXPathResult: XPathResult = this._dataDocument.evaluate(childElementDataXPath, this._dataDocument, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (let i: number = 0; i < snapshotXPathResult.snapshotLength; i++) {
const node: Element = snapshotXPathResult.snapshotItem(i) as Element;
console.log(node.textContent);
}
// ...
此代码创建的结果 XPath 是:
/*[local-name()='request']/*[local-name()='CorrectingData']/*[local-name()='CorrectingDataBlock']/*[local-name()='CurrentVersionData']
和
/*[local-name()='request']/*[local-name()='CorrectingData']/*[local-name()='CorrectingDataBlock']/*[local-name()='NewVersionData']
并且代码生成此输出:
current
100
new
200
current
100
new
200
问题:如何更改我的代码以获得我想要的?我做错了什么?
备注:
- 我应该在 XPath 中使用
local-name()
函数,因为在执行时我不知道 XML 文档的命名空间。
完整代码清单:
export class Parser {
public _schemeDocument: Document;
public _dataDocument: Document;
public processElement(element: Element, dataXPath: string): void {
const typeName: string = this._getElementTypeName(element);
const typeElement: Element = this._getTypeElementByName(typeName);
dataXPath += `/*[local-name()='${element.getAttribute("name")}']`;
if (typeElement && this._isComplexType(typeElement)) {
const sequence = this._schemeDocument.evaluate("./*[local-name()='sequence']", typeElement).iterateNext();
if (sequence) {
Array.prototype.forEach.call((sequence as Element).children, (childElement: Element) => {
if (this._isElement(childElement)) {
const childElementDataXPath: string = dataXPath + `/*[local-name()='${childElement.getAttribute("name")}']`;
const snapshotXPathResult: XPathResult = this._dataDocument.evaluate(childElementDataXPath, this._dataDocument, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
const childTypeName: string = this._getElementTypeName(childElement);
const childTypeElement: Element = this._getTypeElementByName(childTypeName);
if (childTypeElement && this._isComplexType(childTypeElement)) {
for (let i: number = 0; i < snapshotXPathResult.snapshotLength; i++) {
this.processElement(childElement, dataXPath);
}
} else {
const childElementCaption: string = this._getElementCaption(childElement);
for (let i: number = 0; i < snapshotXPathResult.snapshotLength; i++) {
const node: Element = snapshotXPathResult.snapshotItem(i) as Element;
const childElementValue: string = node ? node.textContent : "EMPTY";
console.log(childElementCaption + ": " + childElementValue);
}
}
}
});
}
}
}
private _getElementTypeName(element: Element): string {
const splittedTypeName: string[] = element.getAttribute("type").split(":");
return splittedTypeName.length > 1 ? splittedTypeName[1] : splittedTypeName[0];
}
private _getTypeElementByName(typeName: string): Element {
const simpleTypeXPath = `//*[local-name()='simpleType'][@name='${typeName}']`;
const complexTypeXPath = `//*[local-name()='complexType'][@name='${typeName}']`;
return this._schemeDocument.evaluate(`${simpleTypeXPath}|${complexTypeXPath}`, this._schemeDocument).iterateNext() as Element;
}
private _getElementCaption(element: Element): string {
const elementCaption: Node = this._schemeDocument.evaluate(".//*[local-name()='documentation']", element).iterateNext();
return elementCaption ? elementCaption.textContent : "EMPTY";
}
private _isComplexType(element: Element): boolean {
return element.localName === "complexType";
}
private _isElement(element: Element): boolean {
return element.localName === "element";
}
}
为什么不直接使用 DOM 方法:
const xmlSource = `<?xml version="1.0" encoding="UTF-8"?>
<tns:request xmlns:tns="urn">
<tns:CorrectingData>
<tns:CorrectingDataBlock>
<tns:CurrentVersionData>current</tns:CurrentVersionData>
<tns:NewVersionData>new</tns:NewVersionData>
</tns:CorrectingDataBlock>
<tns:CorrectingDataBlock>
<tns:CurrentVersionData>100</tns:CurrentVersionData>
<tns:NewVersionData>200</tns:NewVersionData>
</tns:CorrectingDataBlock>
</tns:CorrectingData>
</tns:request>`;
const xmlDoc = new DOMParser().parseFromString(xmlSource, 'application/xml');
const blockList = xmlDoc.getElementsByTagNameNS('*', 'CorrectingDataBlock');
for (let i = 0; i < blockList.length; i++) {
Array.from(blockList[i].children).forEach(c => console.log(c.textContent));
}
我将 processElement()
方法更改为:
public processElement(element: Element, dataXPath: string, numberOfDataElementsOfSameType: number = 1, elementDataIndex: number = 0): void {
const typeName: string = this._getElementTypeName(element);
const typeElement: Element = this._getTypeElementByName(typeName);
if (numberOfDataElementsOfSameType > 1) {
dataXPath += `/*[local-name()='${element.getAttribute("name")}'][${elementDataIndex + 1}]`;
} else {
dataXPath += `/*[local-name()='${element.getAttribute("name")}']`;
}
if (typeElement && this._isComplexType(typeElement)) {
const sequenceElement: Element = this._schemeDocument.evaluate("./*[local-name()='sequence']", typeElement).iterateNext() as Element;
if (sequenceElement) {
Array.prototype.forEach.call(sequenceElement.children, (childElement: Element) => {
const childDataXPath: string = dataXPath + `/*[local-name()='${childElement.getAttribute("name")}']`;
const childResult: XPathResult = this._dataDocument.evaluate(childDataXPath, this._dataDocument, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
const childTypeName: string = this._getElementTypeName(childElement);
const childTypeElement: Element = this._getTypeElementByName(childTypeName);
if (childTypeElement && this._isComplexType(childTypeElement)) {
for (let i: number = 0; i < childResult.snapshotLength; i++) {
if (childResult.snapshotLength > 1) {
this.processElement(childElement, dataXPath, childResult.snapshotLength, i);
} else {
this.processElement(childElement, dataXPath, 1, 0);
}
}
} else {
const childElementCaption: string = this._getElementCaption(childElement);
for (let i: number = 0; i < childResult.snapshotLength; i++) {
const childDataElement: Element = childResult.snapshotItem(i) as Element;
const childDataElementValue: string = childDataElement ? childDataElement.textContent : "EMPTY";
console.log(childElementCaption + ": " + childDataElementValue);
}
}
});
}
}
}
主要变化是 XPath 现在包含 XML 文档中元素的索引。这有助于避免重复的叶元素。在具有多个 tns:CorrectingDataBlock
元素的示例中,新的 XPath 如下所示:
/*[local-name()='request']/*[local-name()='CorrectingData']/*[local-name()='CorrectingDataBlock'][1]/*[local-name()='CurrentVersionData']
我有这个 XML 文档:
<?xml version="1.0" encoding="UTF-8"?>
<tns:request xmlns:tns="urn">
<tns:CorrectingData>
<tns:CorrectingDataBlock>
<tns:CurrentVersionData>current</tns:CurrentVersionData>
<tns:NewVersionData>new</tns:NewVersionData>
</tns:CorrectingDataBlock>
<tns:CorrectingDataBlock>
<tns:CurrentVersionData>100</tns:CurrentVersionData>
<tns:NewVersionData>200</tns:NewVersionData>
</tns:CorrectingDataBlock>
</tns:CorrectingData>
</tns:request>
和对应的XSD文档:
<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<xsd:element name="request" type="tns:requestType">
</xsd:element>
<xsd:complexType name="requestType">
<xsd:sequence>
<xsd:element name="CorrectingData" type="tns:CorrectingDataType" minOccurs="0" maxOccurs="1">
</xsd:element>
</xsd:sequence>
</xsd:complexType>
<xsd:complexType name="CorrectingDataType">
<xsd:sequence>
<xsd:element name="CorrectingDataBlock" type="tns:CorrectingDataTextType" minOccurs="1" maxOccurs="unbounded"/>
</xsd:sequence>
</xsd:complexType>
<xsd:complexType name="CorrectingDataTextType">
<xsd:sequence>
<xsd:element name="CurrentVersionData" type="tns:string">
</xsd:element>
<xsd:element name="NewVersionData" type="tns:string">
</xsd:element>
</xsd:sequence>
</xsd:complexType>
</xsd:schema>
我需要在编辑表单中显示整个 XSD 文档,并尽可能插入 XML 文档中的数据。 在我的示例中,我简化了代码,以便它将叶项的值输出到控制台。
为了从 XML 文档中插入数据,我想遍历每个 tns:CorrectingDataBlock
元素并打印其叶子的值(tns:CurrentVersionData
和 tns:NewVersionData
)。 我需要这样的输出:
current
new
100
200
我有这个 javascript 代码,它遍历 XSD 文档并创建 XPath,所以我可以用它从 XML 文档中查找和打印叶子的值。
// ...
// dataXPath = `/*[local-name()='${childElement.getAttribute("name")}']`
// I'm using recursive method, which fills dataXPath from root and passes it to child node all the way to the leaf.
// After I reach the leaf node I print its value.
const childElementDataXPath: string = dataXPath + `/*[local-name()='${childElement.getAttribute("name")}']`;
const snapshotXPathResult: XPathResult = this._dataDocument.evaluate(childElementDataXPath, this._dataDocument, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (let i: number = 0; i < snapshotXPathResult.snapshotLength; i++) {
const node: Element = snapshotXPathResult.snapshotItem(i) as Element;
console.log(node.textContent);
}
// ...
此代码创建的结果 XPath 是:
/*[local-name()='request']/*[local-name()='CorrectingData']/*[local-name()='CorrectingDataBlock']/*[local-name()='CurrentVersionData']
和
/*[local-name()='request']/*[local-name()='CorrectingData']/*[local-name()='CorrectingDataBlock']/*[local-name()='NewVersionData']
并且代码生成此输出:
current
100
new
200
current
100
new
200
问题:如何更改我的代码以获得我想要的?我做错了什么?
备注:
- 我应该在 XPath 中使用
local-name()
函数,因为在执行时我不知道 XML 文档的命名空间。
完整代码清单:
export class Parser {
public _schemeDocument: Document;
public _dataDocument: Document;
public processElement(element: Element, dataXPath: string): void {
const typeName: string = this._getElementTypeName(element);
const typeElement: Element = this._getTypeElementByName(typeName);
dataXPath += `/*[local-name()='${element.getAttribute("name")}']`;
if (typeElement && this._isComplexType(typeElement)) {
const sequence = this._schemeDocument.evaluate("./*[local-name()='sequence']", typeElement).iterateNext();
if (sequence) {
Array.prototype.forEach.call((sequence as Element).children, (childElement: Element) => {
if (this._isElement(childElement)) {
const childElementDataXPath: string = dataXPath + `/*[local-name()='${childElement.getAttribute("name")}']`;
const snapshotXPathResult: XPathResult = this._dataDocument.evaluate(childElementDataXPath, this._dataDocument, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
const childTypeName: string = this._getElementTypeName(childElement);
const childTypeElement: Element = this._getTypeElementByName(childTypeName);
if (childTypeElement && this._isComplexType(childTypeElement)) {
for (let i: number = 0; i < snapshotXPathResult.snapshotLength; i++) {
this.processElement(childElement, dataXPath);
}
} else {
const childElementCaption: string = this._getElementCaption(childElement);
for (let i: number = 0; i < snapshotXPathResult.snapshotLength; i++) {
const node: Element = snapshotXPathResult.snapshotItem(i) as Element;
const childElementValue: string = node ? node.textContent : "EMPTY";
console.log(childElementCaption + ": " + childElementValue);
}
}
}
});
}
}
}
private _getElementTypeName(element: Element): string {
const splittedTypeName: string[] = element.getAttribute("type").split(":");
return splittedTypeName.length > 1 ? splittedTypeName[1] : splittedTypeName[0];
}
private _getTypeElementByName(typeName: string): Element {
const simpleTypeXPath = `//*[local-name()='simpleType'][@name='${typeName}']`;
const complexTypeXPath = `//*[local-name()='complexType'][@name='${typeName}']`;
return this._schemeDocument.evaluate(`${simpleTypeXPath}|${complexTypeXPath}`, this._schemeDocument).iterateNext() as Element;
}
private _getElementCaption(element: Element): string {
const elementCaption: Node = this._schemeDocument.evaluate(".//*[local-name()='documentation']", element).iterateNext();
return elementCaption ? elementCaption.textContent : "EMPTY";
}
private _isComplexType(element: Element): boolean {
return element.localName === "complexType";
}
private _isElement(element: Element): boolean {
return element.localName === "element";
}
}
为什么不直接使用 DOM 方法:
const xmlSource = `<?xml version="1.0" encoding="UTF-8"?>
<tns:request xmlns:tns="urn">
<tns:CorrectingData>
<tns:CorrectingDataBlock>
<tns:CurrentVersionData>current</tns:CurrentVersionData>
<tns:NewVersionData>new</tns:NewVersionData>
</tns:CorrectingDataBlock>
<tns:CorrectingDataBlock>
<tns:CurrentVersionData>100</tns:CurrentVersionData>
<tns:NewVersionData>200</tns:NewVersionData>
</tns:CorrectingDataBlock>
</tns:CorrectingData>
</tns:request>`;
const xmlDoc = new DOMParser().parseFromString(xmlSource, 'application/xml');
const blockList = xmlDoc.getElementsByTagNameNS('*', 'CorrectingDataBlock');
for (let i = 0; i < blockList.length; i++) {
Array.from(blockList[i].children).forEach(c => console.log(c.textContent));
}
我将 processElement()
方法更改为:
public processElement(element: Element, dataXPath: string, numberOfDataElementsOfSameType: number = 1, elementDataIndex: number = 0): void {
const typeName: string = this._getElementTypeName(element);
const typeElement: Element = this._getTypeElementByName(typeName);
if (numberOfDataElementsOfSameType > 1) {
dataXPath += `/*[local-name()='${element.getAttribute("name")}'][${elementDataIndex + 1}]`;
} else {
dataXPath += `/*[local-name()='${element.getAttribute("name")}']`;
}
if (typeElement && this._isComplexType(typeElement)) {
const sequenceElement: Element = this._schemeDocument.evaluate("./*[local-name()='sequence']", typeElement).iterateNext() as Element;
if (sequenceElement) {
Array.prototype.forEach.call(sequenceElement.children, (childElement: Element) => {
const childDataXPath: string = dataXPath + `/*[local-name()='${childElement.getAttribute("name")}']`;
const childResult: XPathResult = this._dataDocument.evaluate(childDataXPath, this._dataDocument, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
const childTypeName: string = this._getElementTypeName(childElement);
const childTypeElement: Element = this._getTypeElementByName(childTypeName);
if (childTypeElement && this._isComplexType(childTypeElement)) {
for (let i: number = 0; i < childResult.snapshotLength; i++) {
if (childResult.snapshotLength > 1) {
this.processElement(childElement, dataXPath, childResult.snapshotLength, i);
} else {
this.processElement(childElement, dataXPath, 1, 0);
}
}
} else {
const childElementCaption: string = this._getElementCaption(childElement);
for (let i: number = 0; i < childResult.snapshotLength; i++) {
const childDataElement: Element = childResult.snapshotItem(i) as Element;
const childDataElementValue: string = childDataElement ? childDataElement.textContent : "EMPTY";
console.log(childElementCaption + ": " + childDataElementValue);
}
}
});
}
}
}
主要变化是 XPath 现在包含 XML 文档中元素的索引。这有助于避免重复的叶元素。在具有多个 tns:CorrectingDataBlock
元素的示例中,新的 XPath 如下所示:
/*[local-name()='request']/*[local-name()='CorrectingData']/*[local-name()='CorrectingDataBlock'][1]/*[local-name()='CurrentVersionData']