递归提取 XML 属性
Recursively Extract XML Attribute
我有一个 XML 文档:
library("xml2")
xml_ex <- '
<Member name="ONE">
<Member name="A"/>
<Member name="B">
<Member name="1"/>
<Member name="2"/>
</Member>
<Member name="C"/>
</Member>'
ex <- read_xml(xml_ex)
如何在保留层次关系的同时从每个 Member
中提取 name
属性?例如:
structure(
list(
ONE = structure(
list(
A = "",
B = structure(
list(
`1` = "",
`2` = ""
),
.Names = c("1", "2")
),
C = ""),
.Names = c("A", "B", "C")
)
),
.Names = "ONE"
)
## $ONE
## $ONE$A
## [1] ""
##
## $ONE$B
## $ONE$B$`1`
## [1] ""
##
## $ONE$B$`2`
## [1] ""
##
## $ONE$C
## [1] ""
编辑:更改目标输出
我找到了下面的解决方案,该解决方案有效但看起来很笨拙。
takeTheChildren <- function(x, search) {
# extracting the nth node (search) from the nodeset x
lapply(search, xml2::xml_child, x = x)
}
hierBuilder <- function(nodes) {
if (!requireNamespace("xml2", quietly = TRUE)) {
stop("`xml2` needed for this function to work. Please install it.", call. = FALSE)
}
# if we reach the leaf level of any of the node sets,
# just return an empty string
if (length(nodes) == 0L) {
return("")
}
# extract the names of each of the current top level nodes
names(nodes) <- sapply(nodes, xml2::xml_attr, attr = 'name')
# count the children each of the current top level node has, make a sequence
seq_ix <- lapply(nodes, function(node) {
seq(xml2::xml_children(node))
})
# make a list of individual child nodes under each of the current top level
# nodes, while preserving the hierarchy
children <- mapply(takeTheChildren, x = nodes, search = seq_ix, SIMPLIFY = FALSE)
# recurse on the current node's children
return(lapply(children, hierBuilder))
}
一个烦人的要求是我们必须传递初始 xml_doc 或 xml_nodeset 作为递归工作的列表:
hierBuilder(list(ex))
## $ONE
## $ONE$A
## [1] ""
##
## $ONE$B
## $ONE$B$`1`
## [1] ""
##
## $ONE$B$`2`
## [1] ""
##
## $ONE$C
## [1] ""
我有一个 XML 文档:
library("xml2")
xml_ex <- '
<Member name="ONE">
<Member name="A"/>
<Member name="B">
<Member name="1"/>
<Member name="2"/>
</Member>
<Member name="C"/>
</Member>'
ex <- read_xml(xml_ex)
如何在保留层次关系的同时从每个 Member
中提取 name
属性?例如:
structure(
list(
ONE = structure(
list(
A = "",
B = structure(
list(
`1` = "",
`2` = ""
),
.Names = c("1", "2")
),
C = ""),
.Names = c("A", "B", "C")
)
),
.Names = "ONE"
)
## $ONE
## $ONE$A
## [1] ""
##
## $ONE$B
## $ONE$B$`1`
## [1] ""
##
## $ONE$B$`2`
## [1] ""
##
## $ONE$C
## [1] ""
编辑:更改目标输出
我找到了下面的解决方案,该解决方案有效但看起来很笨拙。
takeTheChildren <- function(x, search) {
# extracting the nth node (search) from the nodeset x
lapply(search, xml2::xml_child, x = x)
}
hierBuilder <- function(nodes) {
if (!requireNamespace("xml2", quietly = TRUE)) {
stop("`xml2` needed for this function to work. Please install it.", call. = FALSE)
}
# if we reach the leaf level of any of the node sets,
# just return an empty string
if (length(nodes) == 0L) {
return("")
}
# extract the names of each of the current top level nodes
names(nodes) <- sapply(nodes, xml2::xml_attr, attr = 'name')
# count the children each of the current top level node has, make a sequence
seq_ix <- lapply(nodes, function(node) {
seq(xml2::xml_children(node))
})
# make a list of individual child nodes under each of the current top level
# nodes, while preserving the hierarchy
children <- mapply(takeTheChildren, x = nodes, search = seq_ix, SIMPLIFY = FALSE)
# recurse on the current node's children
return(lapply(children, hierBuilder))
}
一个烦人的要求是我们必须传递初始 xml_doc 或 xml_nodeset 作为递归工作的列表:
hierBuilder(list(ex))
## $ONE
## $ONE$A
## [1] ""
##
## $ONE$B
## $ONE$B$`1`
## [1] ""
##
## $ONE$B$`2`
## [1] ""
##
## $ONE$C
## [1] ""