XML 文档的索引路径
Index path for XML document
我有一个索引 Xpath 的算法,这样我就可以通过为 xpath 节点分配唯一索引来处理具有相同 Xpath 但不同值的多个文件。
示例:
文件 1:
<Return>
<ReturnData>
<Person>
<Name>Yohanna</Name>
</Person>
</ReturnData>
</Return>
文件 2:
<Return>
<ReturnData>
<Person>
<Name>Jacoub</Name>
</Person>
</ReturnData>
</Return>
期望输出:
1. /Return/ReturnData/Person[1]/Name=Yohanna
2. /Return/ReturnData/Person[2]/Name=Jacoub
我当前的实现输出为:
1. /Return/ReturnData[1]/Person[1]/Name[1]=Yohanna
2. /Return/ReturnData[1]/Person[2]/Name[1]=Jacoub
我想删除 [1],因为不需要它们来表示此节点仅出现 1 次。
索引代码:
public String getFullXPath(Node n) {
if (null == n) return null;
Node parent;
Stack<Node> hierarchy = new Stack<Node>();
StringBuilder builder = new StringBuilder();
hierarchy.push(n);
switch (n.getNodeType()) {
case Node.ATTRIBUTE_NODE:
parent = ((Attr) n).getOwnerElement();
break;
case Node.ELEMENT_NODE:
parent = n.getParentNode();
break;
case Node.DOCUMENT_NODE:
parent = n.getParentNode();
break;
default:
throw new IllegalStateException("Unexpected Node type" + n.getNodeType());
}
while (null != parent
&& parent.getNodeType() != Node.DOCUMENT_NODE
&& !parent.getNodeName().equals("section")) {
hierarchy.push(parent);
parent = parent.getParentNode();
}
Object obj;
while (!hierarchy.isEmpty() && null != (obj = hierarchy.pop())) {
Node node = (Node) obj;
if (node.getNodeType() == Node.ELEMENT_NODE) {
builder.append("/").append(node.getNodeName());
int prev_siblings = 1;
Node prev_sibling = node.getPreviousSibling();
while (null != prev_sibling) {
if (prev_sibling.getNodeType() == node.getNodeType()) {
if (prev_sibling.getNodeName().equalsIgnoreCase(node.getNodeName())) {
prev_siblings++;
}
}
prev_sibling = prev_sibling.getPreviousSibling();
}
// Here is where I say don't append the number of prev_siblings if it equals 1 or the next sibling does not exist
if(prev_siblings == 1 && node.getNextSibling() == null) {
}
else
builder.append("[").append(prev_siblings).append("]");
}
else if (node.getNodeType() == Node.ATTRIBUTE_NODE) {
builder.append("/@");
builder.append(node.getNodeName());
}
}
return builder.toString();
}
我已经尝试修复此问题,但经过 3 天的调查和调试后我仍然无法修复...不知道...我知道我遗漏了一些东西,一些我没有看到的东西。任何帮助或协助将不胜感激。
编辑:
添加了 2 个辅助方法:
private static boolean hasNextElementsWithName(Node node) {
while (null != node) {
// checks if next sibling exists
if(node.getNextSibling().hasAttributes()) {
return true;
}
}
return false;
}
private static int countPrevElementsWithName(Node node, int prev_siblings,
Node prev_sibling) {
while (null != prev_sibling) {
if (prev_sibling.getNodeType() == node.getNodeType()) {
if (prev_sibling.getNodeName().equalsIgnoreCase(node.getNodeName())) {
prev_siblings++;
}
}
prev_sibling = prev_sibling.getPreviousSibling();
}
return prev_siblings;
}
调用方式:
Object obj;
while (!hierarchy.isEmpty() && null != (obj = hierarchy.pop())) {
Node node = (Node) obj;
if (node.getNodeType() == Node.ELEMENT_NODE) {
builder.append("/").append(node.getNodeName());
int prev_siblings = 1;
Node prev_sibling = node.getPreviousSibling();
prev_siblings = countPrevElementsWithName(node, prev_siblings,
prev_sibling);
//@Andreas
int count = countPrevElementsWithName(node, prev_siblings, prev_sibling);
if(count != 0 || hasNextElementsWithName(node)) {
builder.append("[").append(count+1).append("]");
}
}
else if (node.getNodeType() == Node.ATTRIBUTE_NODE) {
builder.append("/@");
builder.append(node.getNodeName());
}
}
我现在不知道如何使用它们?
在 Java/DOM 中这样做很痛苦,所以这是一个 XSLT 解决方案:
<xsl:template match="/" mode="path">
<xsl:text>/</xsl:text>
</xsl:template>
<xsl:template match="*" mode="path">
<xsl:apply-templates select=".." mode="path"/>
<xsl:text>/</xsl:text>
<xsl:value-of select="name()"/>
<xsl:text>[</xsl:text>
<xsl:number/>
<xsl:text>]</xsl:text>
</xsl:template>
<xsl:template match="*[count(../*)=1]" mode="path">
<xsl:apply-templates select=".." mode="path"/>
<xsl:text>/</xsl:text>
<xsl:value-of select="name()"/>
</xsl:template>
<xsl:template match="/">
<xsl:for-each select="//Name">
<xsl:apply-templates select="." mode="path"/>
<xsl:text>=</xsl:text>
<xsl:value-of select="."/>
<xsl:text>
</xsl:text>
</xsl:for-each>
</xsl:template>
<xsl:strip-space elements="*"/>
<xsl:output method="text"/>
代码存根
int count = countPrevElementsWithSameName(node);
if (count != 0 || hasNextElementWithSameName(node))
builder.append("[").append(count + 1).append("]");
辅助方法
private static final boolean hasNextElementWithSameName(Node node) {
String name = node.getNodeName();
for (Node next = node.getNextSibling(); next != null; next = next.getNextSibling())
if (next.getNodeType() == Node.ELEMENT_NODE) // only look at elements
return next.getNodeName().equals(name); // stop on first element after "node"
return false;
}
private static final int countPrevElementsWithSameName(Node node) {
String name = node.getNodeName();
int count = 0;
for (Node prev = node.getPreviousSibling(); prev != null; prev = prev.getPreviousSibling())
if (prev.getNodeType() == Node.ELEMENT_NODE) { // only look at elements
if (! prev.getNodeName().equals(name))
break; // stop when element name changes
count++; // count elements of same name as "node"
}
return count;
}
我有一个索引 Xpath 的算法,这样我就可以通过为 xpath 节点分配唯一索引来处理具有相同 Xpath 但不同值的多个文件。
示例:
文件 1:
<Return>
<ReturnData>
<Person>
<Name>Yohanna</Name>
</Person>
</ReturnData>
</Return>
文件 2:
<Return>
<ReturnData>
<Person>
<Name>Jacoub</Name>
</Person>
</ReturnData>
</Return>
期望输出:
1. /Return/ReturnData/Person[1]/Name=Yohanna
2. /Return/ReturnData/Person[2]/Name=Jacoub
我当前的实现输出为:
1. /Return/ReturnData[1]/Person[1]/Name[1]=Yohanna
2. /Return/ReturnData[1]/Person[2]/Name[1]=Jacoub
我想删除 [1],因为不需要它们来表示此节点仅出现 1 次。
索引代码:
public String getFullXPath(Node n) {
if (null == n) return null;
Node parent;
Stack<Node> hierarchy = new Stack<Node>();
StringBuilder builder = new StringBuilder();
hierarchy.push(n);
switch (n.getNodeType()) {
case Node.ATTRIBUTE_NODE:
parent = ((Attr) n).getOwnerElement();
break;
case Node.ELEMENT_NODE:
parent = n.getParentNode();
break;
case Node.DOCUMENT_NODE:
parent = n.getParentNode();
break;
default:
throw new IllegalStateException("Unexpected Node type" + n.getNodeType());
}
while (null != parent
&& parent.getNodeType() != Node.DOCUMENT_NODE
&& !parent.getNodeName().equals("section")) {
hierarchy.push(parent);
parent = parent.getParentNode();
}
Object obj;
while (!hierarchy.isEmpty() && null != (obj = hierarchy.pop())) {
Node node = (Node) obj;
if (node.getNodeType() == Node.ELEMENT_NODE) {
builder.append("/").append(node.getNodeName());
int prev_siblings = 1;
Node prev_sibling = node.getPreviousSibling();
while (null != prev_sibling) {
if (prev_sibling.getNodeType() == node.getNodeType()) {
if (prev_sibling.getNodeName().equalsIgnoreCase(node.getNodeName())) {
prev_siblings++;
}
}
prev_sibling = prev_sibling.getPreviousSibling();
}
// Here is where I say don't append the number of prev_siblings if it equals 1 or the next sibling does not exist
if(prev_siblings == 1 && node.getNextSibling() == null) {
}
else
builder.append("[").append(prev_siblings).append("]");
}
else if (node.getNodeType() == Node.ATTRIBUTE_NODE) {
builder.append("/@");
builder.append(node.getNodeName());
}
}
return builder.toString();
}
我已经尝试修复此问题,但经过 3 天的调查和调试后我仍然无法修复...不知道...我知道我遗漏了一些东西,一些我没有看到的东西。任何帮助或协助将不胜感激。
编辑:
添加了 2 个辅助方法:
private static boolean hasNextElementsWithName(Node node) {
while (null != node) {
// checks if next sibling exists
if(node.getNextSibling().hasAttributes()) {
return true;
}
}
return false;
}
private static int countPrevElementsWithName(Node node, int prev_siblings,
Node prev_sibling) {
while (null != prev_sibling) {
if (prev_sibling.getNodeType() == node.getNodeType()) {
if (prev_sibling.getNodeName().equalsIgnoreCase(node.getNodeName())) {
prev_siblings++;
}
}
prev_sibling = prev_sibling.getPreviousSibling();
}
return prev_siblings;
}
调用方式:
Object obj;
while (!hierarchy.isEmpty() && null != (obj = hierarchy.pop())) {
Node node = (Node) obj;
if (node.getNodeType() == Node.ELEMENT_NODE) {
builder.append("/").append(node.getNodeName());
int prev_siblings = 1;
Node prev_sibling = node.getPreviousSibling();
prev_siblings = countPrevElementsWithName(node, prev_siblings,
prev_sibling);
//@Andreas
int count = countPrevElementsWithName(node, prev_siblings, prev_sibling);
if(count != 0 || hasNextElementsWithName(node)) {
builder.append("[").append(count+1).append("]");
}
}
else if (node.getNodeType() == Node.ATTRIBUTE_NODE) {
builder.append("/@");
builder.append(node.getNodeName());
}
}
我现在不知道如何使用它们?
在 Java/DOM 中这样做很痛苦,所以这是一个 XSLT 解决方案:
<xsl:template match="/" mode="path">
<xsl:text>/</xsl:text>
</xsl:template>
<xsl:template match="*" mode="path">
<xsl:apply-templates select=".." mode="path"/>
<xsl:text>/</xsl:text>
<xsl:value-of select="name()"/>
<xsl:text>[</xsl:text>
<xsl:number/>
<xsl:text>]</xsl:text>
</xsl:template>
<xsl:template match="*[count(../*)=1]" mode="path">
<xsl:apply-templates select=".." mode="path"/>
<xsl:text>/</xsl:text>
<xsl:value-of select="name()"/>
</xsl:template>
<xsl:template match="/">
<xsl:for-each select="//Name">
<xsl:apply-templates select="." mode="path"/>
<xsl:text>=</xsl:text>
<xsl:value-of select="."/>
<xsl:text>
</xsl:text>
</xsl:for-each>
</xsl:template>
<xsl:strip-space elements="*"/>
<xsl:output method="text"/>
代码存根
int count = countPrevElementsWithSameName(node);
if (count != 0 || hasNextElementWithSameName(node))
builder.append("[").append(count + 1).append("]");
辅助方法
private static final boolean hasNextElementWithSameName(Node node) {
String name = node.getNodeName();
for (Node next = node.getNextSibling(); next != null; next = next.getNextSibling())
if (next.getNodeType() == Node.ELEMENT_NODE) // only look at elements
return next.getNodeName().equals(name); // stop on first element after "node"
return false;
}
private static final int countPrevElementsWithSameName(Node node) {
String name = node.getNodeName();
int count = 0;
for (Node prev = node.getPreviousSibling(); prev != null; prev = prev.getPreviousSibling())
if (prev.getNodeType() == Node.ELEMENT_NODE) { // only look at elements
if (! prev.getNodeName().equals(name))
break; // stop when element name changes
count++; // count elements of same name as "node"
}
return count;
}