使用 XPath 遍历 XML 个文档而不打印重复项

Iterating through XML document with XPath without printing duplicates

我有这个 XML 文档:

<?xml version="1.0" encoding="UTF-8"?>
<tns:request xmlns:tns="urn">
    <tns:CorrectingData>
        <tns:CorrectingDataBlock>
            <tns:CurrentVersionData>current</tns:CurrentVersionData>
            <tns:NewVersionData>new</tns:NewVersionData>
        </tns:CorrectingDataBlock>
        <tns:CorrectingDataBlock>
            <tns:CurrentVersionData>100</tns:CurrentVersionData>
            <tns:NewVersionData>200</tns:NewVersionData>
        </tns:CorrectingDataBlock>
    </tns:CorrectingData>
</tns:request>

和对应的XSD文档:

<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">

<xsd:element name="request" type="tns:requestType">
</xsd:element>

<xsd:complexType name="requestType">
    <xsd:sequence>
        <xsd:element name="CorrectingData" type="tns:CorrectingDataType" minOccurs="0" maxOccurs="1">
        </xsd:element>
    </xsd:sequence>
</xsd:complexType>

<xsd:complexType name="CorrectingDataType">
    <xsd:sequence>
        <xsd:element name="CorrectingDataBlock" type="tns:CorrectingDataTextType" minOccurs="1" maxOccurs="unbounded"/>
    </xsd:sequence>
</xsd:complexType>

<xsd:complexType name="CorrectingDataTextType">
    <xsd:sequence>
        <xsd:element name="CurrentVersionData" type="tns:string">
        </xsd:element>
        <xsd:element name="NewVersionData" type="tns:string">
        </xsd:element>
    </xsd:sequence>
</xsd:complexType>

</xsd:schema>

我需要在编辑表单中显示整个 XSD 文档,并尽可能插入 XML 文档中的数据。 在我的示例中,我简化了代码,以便它将叶项的值输出到控制台。

为了从 XML 文档中插入数据,我想遍历每个 tns:CorrectingDataBlock 元素并打印其叶子的值(tns:CurrentVersionDatatns:NewVersionData)。 我需要这样的输出:

current
new
100
200

我有这个 javascript 代码,它遍历 XSD 文档并创建 XPath,所以我可以用它从 XML 文档中查找和打印叶子的值。

// ...
// dataXPath = `/*[local-name()='${childElement.getAttribute("name")}']`
// I'm using recursive method, which fills dataXPath from root and passes it to child node all the way to the leaf.
// After I reach the leaf node I print its value.

const childElementDataXPath: string = dataXPath + `/*[local-name()='${childElement.getAttribute("name")}']`;
const snapshotXPathResult: XPathResult = this._dataDocument.evaluate(childElementDataXPath, this._dataDocument, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);

for (let i: number = 0; i < snapshotXPathResult.snapshotLength; i++) {
  const node: Element = snapshotXPathResult.snapshotItem(i) as Element;
  console.log(node.textContent);
}
// ...

此代码创建的结果 XPath 是:

/*[local-name()='request']/*[local-name()='CorrectingData']/*[local-name()='CorrectingDataBlock']/*[local-name()='CurrentVersionData']

/*[local-name()='request']/*[local-name()='CorrectingData']/*[local-name()='CorrectingDataBlock']/*[local-name()='NewVersionData']

并且代码生成此输出:

current
100
new
200
current
100
new
200

问题:如何更改我的代码以获得我想要的?我做错了什么?

备注:

完整代码清单:

export class Parser {

    public _schemeDocument: Document;
    public _dataDocument: Document;

    public processElement(element: Element, dataXPath: string): void {
        const typeName: string = this._getElementTypeName(element);
        const typeElement: Element = this._getTypeElementByName(typeName);

        dataXPath += `/*[local-name()='${element.getAttribute("name")}']`;

        if (typeElement && this._isComplexType(typeElement)) {
            const sequence = this._schemeDocument.evaluate("./*[local-name()='sequence']", typeElement).iterateNext();
            if (sequence) {
                Array.prototype.forEach.call((sequence as Element).children, (childElement: Element) => {
                    if (this._isElement(childElement)) {
                        const childElementDataXPath: string = dataXPath + `/*[local-name()='${childElement.getAttribute("name")}']`;
                        const snapshotXPathResult: XPathResult = this._dataDocument.evaluate(childElementDataXPath, this._dataDocument, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);

                        const childTypeName: string = this._getElementTypeName(childElement);
                        const childTypeElement: Element = this._getTypeElementByName(childTypeName);
                        if (childTypeElement && this._isComplexType(childTypeElement)) {
                            for (let i: number = 0; i < snapshotXPathResult.snapshotLength; i++) {
                                this.processElement(childElement, dataXPath);
                            }
                        } else {
                            const childElementCaption: string = this._getElementCaption(childElement);

                            for (let i: number = 0; i < snapshotXPathResult.snapshotLength; i++) {
                                const node: Element = snapshotXPathResult.snapshotItem(i) as Element;
                                const childElementValue: string = node ? node.textContent : "EMPTY";
                                console.log(childElementCaption + ": " + childElementValue);
                            }
                        }
                    }
                });
            }
        }
    }

    private _getElementTypeName(element: Element): string {
        const splittedTypeName: string[] = element.getAttribute("type").split(":");
        return splittedTypeName.length > 1 ? splittedTypeName[1] : splittedTypeName[0];
    }

    private _getTypeElementByName(typeName: string): Element {
        const simpleTypeXPath = `//*[local-name()='simpleType'][@name='${typeName}']`;
        const complexTypeXPath = `//*[local-name()='complexType'][@name='${typeName}']`;
        return this._schemeDocument.evaluate(`${simpleTypeXPath}|${complexTypeXPath}`, this._schemeDocument).iterateNext() as Element;
    }

    private _getElementCaption(element: Element): string {
        const elementCaption: Node = this._schemeDocument.evaluate(".//*[local-name()='documentation']", element).iterateNext();
        return elementCaption ? elementCaption.textContent : "EMPTY";
    }

    private _isComplexType(element: Element): boolean {
        return element.localName === "complexType";
    }

    private _isElement(element: Element): boolean {
        return element.localName === "element";
    }
}

为什么不直接使用 DOM 方法:

const xmlSource = `<?xml version="1.0" encoding="UTF-8"?>
<tns:request xmlns:tns="urn">
    <tns:CorrectingData>
        <tns:CorrectingDataBlock>
            <tns:CurrentVersionData>current</tns:CurrentVersionData>
            <tns:NewVersionData>new</tns:NewVersionData>
        </tns:CorrectingDataBlock>
        <tns:CorrectingDataBlock>
            <tns:CurrentVersionData>100</tns:CurrentVersionData>
            <tns:NewVersionData>200</tns:NewVersionData>
        </tns:CorrectingDataBlock>
    </tns:CorrectingData>
</tns:request>`;

const xmlDoc = new DOMParser().parseFromString(xmlSource, 'application/xml');

const blockList = xmlDoc.getElementsByTagNameNS('*', 'CorrectingDataBlock');

for (let i = 0; i < blockList.length; i++) {
  Array.from(blockList[i].children).forEach(c => console.log(c.textContent));
}

我将 processElement() 方法更改为:

public processElement(element: Element, dataXPath: string, numberOfDataElementsOfSameType: number = 1, elementDataIndex: number = 0): void {
    const typeName: string = this._getElementTypeName(element);
    const typeElement: Element = this._getTypeElementByName(typeName);

    if (numberOfDataElementsOfSameType > 1) {
        dataXPath += `/*[local-name()='${element.getAttribute("name")}'][${elementDataIndex + 1}]`;
    } else {
        dataXPath += `/*[local-name()='${element.getAttribute("name")}']`;
    }

    if (typeElement && this._isComplexType(typeElement)) {
        const sequenceElement: Element = this._schemeDocument.evaluate("./*[local-name()='sequence']", typeElement).iterateNext() as Element;
        if (sequenceElement) {
            Array.prototype.forEach.call(sequenceElement.children, (childElement: Element) => {
                const childDataXPath: string = dataXPath + `/*[local-name()='${childElement.getAttribute("name")}']`;
                const childResult: XPathResult = this._dataDocument.evaluate(childDataXPath, this._dataDocument, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
                const childTypeName: string = this._getElementTypeName(childElement);
                const childTypeElement: Element = this._getTypeElementByName(childTypeName);
                if (childTypeElement && this._isComplexType(childTypeElement)) {
                    for (let i: number = 0; i < childResult.snapshotLength; i++) {
                        if (childResult.snapshotLength > 1) {
                            this.processElement(childElement, dataXPath, childResult.snapshotLength, i);
                        } else {
                            this.processElement(childElement, dataXPath, 1, 0);
                        }
                    }
                } else {
                    const childElementCaption: string = this._getElementCaption(childElement);
                    for (let i: number = 0; i < childResult.snapshotLength; i++) {
                        const childDataElement: Element = childResult.snapshotItem(i) as Element;
                        const childDataElementValue: string = childDataElement ? childDataElement.textContent : "EMPTY";
                        console.log(childElementCaption + ": " + childDataElementValue);
                    }
                }
            });
        }
    }
}

主要变化是 XPath 现在包含 XML 文档中元素的索引。这有助于避免重复的叶元素。在具有多个 tns:CorrectingDataBlock 元素的示例中,新的 XPath 如下所示:

/*[local-name()='request']/*[local-name()='CorrectingData']/*[local-name()='CorrectingDataBlock'][1]/*[local-name()='CurrentVersionData']