将数据框转换为 XML

Convert Dataframe to XML

我正在尝试将数据帧转换为 xml。大约有 600K 条记录。我正在使用 XML 包:

library(XML)

con <- xmlOutputDOM("mydata")
for(i in seq(nrow(mydata))){
  con$addTag("person", attrs = mydata[i,])
}

上面的代码花费的时间太长 运行,我有没有办法重写这段代码或使用不同的包来提高性能?

尝试 lapply 而不是使用循环。也许这会解决。

library('XML')

数据

df1 <- data.frame(a = 1:7, b = letters[1:7], stringsAsFactors = FALSE)

代码

# create a new xml doc
doc_xml <- newXMLDoc(isHTML = FALSE)

# create a table node
table_node <- newXMLNode("table", doc = doc_xml)

# row data
row_data <- apply(df1, 1, function(x) {
  z1 <- newXMLNode('row') # create a new node for each row
  addChildren(z1, lapply(names(x), function(y) newXMLNode(y, x[y])))
})

# add row data to table node
xmlParent(row_data) <- table_node

# save as xml file
saveXML(doc_xml, file = "df1.xml")

输出

doc_xml
# <?xml version="1.0"?>
# <table>
#   <row>
#     <a>1</a>
#     <b>a</b>
#   </row>
#   <row>
#     <a>2</a>
#     <b>b</b>
#   </row>
#   <row>
#     <a>3</a>
#     <b>c</b>
#   </row>
#   <row>
#     <a>4</a>
#     <b>d</b>
#   </row>
#   <row>
#     <a>5</a>
#     <b>e</b>
#   </row>
#   <row>
#     <a>6</a>
#     <b>f</b>
#   </row>
#   <row>
#     <a>7</a>
#     <b>g</b>
#   </row>
# </table>

验证节点

getNodeSet(doc_xml, "//a")
getNodeSet(doc_xml, "//b")

将xml转换为数据帧

# using xpath expression of xml data inside R
xmlToDataFrame(nodes = getNodeSet(doc_xml, "//table/*"),
               stringsAsFactors = FALSE,
               colClasses = c('integer', 'character'))
# using name of xml data inside R
xmlToDataFrame(doc = doc_xml, 
               stringsAsFactors = FALSE, 
               colClasses = c('integer', 'character'))
# from xml file
xmlToDataFrame(doc = "df1.xml", 
               stringsAsFactors = FALSE, 
               colClasses = c('integer', 'character'))
#   a b
# 1 1 a
# 2 2 b
# 3 3 c
# 4 4 d
# 5 5 e
# 6 6 f
# 7 7 g