将数据框转换为 XML
Convert Dataframe to XML
我正在尝试将数据帧转换为 xml。大约有 600K 条记录。我正在使用 XML 包:
library(XML)
con <- xmlOutputDOM("mydata")
for(i in seq(nrow(mydata))){
con$addTag("person", attrs = mydata[i,])
}
上面的代码花费的时间太长 运行,我有没有办法重写这段代码或使用不同的包来提高性能?
尝试 lapply
而不是使用循环。也许这会解决。
library('XML')
数据
df1 <- data.frame(a = 1:7, b = letters[1:7], stringsAsFactors = FALSE)
代码
# create a new xml doc
doc_xml <- newXMLDoc(isHTML = FALSE)
# create a table node
table_node <- newXMLNode("table", doc = doc_xml)
# row data
row_data <- apply(df1, 1, function(x) {
z1 <- newXMLNode('row') # create a new node for each row
addChildren(z1, lapply(names(x), function(y) newXMLNode(y, x[y])))
})
# add row data to table node
xmlParent(row_data) <- table_node
# save as xml file
saveXML(doc_xml, file = "df1.xml")
输出
doc_xml
# <?xml version="1.0"?>
# <table>
# <row>
# <a>1</a>
# <b>a</b>
# </row>
# <row>
# <a>2</a>
# <b>b</b>
# </row>
# <row>
# <a>3</a>
# <b>c</b>
# </row>
# <row>
# <a>4</a>
# <b>d</b>
# </row>
# <row>
# <a>5</a>
# <b>e</b>
# </row>
# <row>
# <a>6</a>
# <b>f</b>
# </row>
# <row>
# <a>7</a>
# <b>g</b>
# </row>
# </table>
验证节点
getNodeSet(doc_xml, "//a")
getNodeSet(doc_xml, "//b")
将xml转换为数据帧
# using xpath expression of xml data inside R
xmlToDataFrame(nodes = getNodeSet(doc_xml, "//table/*"),
stringsAsFactors = FALSE,
colClasses = c('integer', 'character'))
# using name of xml data inside R
xmlToDataFrame(doc = doc_xml,
stringsAsFactors = FALSE,
colClasses = c('integer', 'character'))
# from xml file
xmlToDataFrame(doc = "df1.xml",
stringsAsFactors = FALSE,
colClasses = c('integer', 'character'))
# a b
# 1 1 a
# 2 2 b
# 3 3 c
# 4 4 d
# 5 5 e
# 6 6 f
# 7 7 g
我正在尝试将数据帧转换为 xml。大约有 600K 条记录。我正在使用 XML 包:
library(XML)
con <- xmlOutputDOM("mydata")
for(i in seq(nrow(mydata))){
con$addTag("person", attrs = mydata[i,])
}
上面的代码花费的时间太长 运行,我有没有办法重写这段代码或使用不同的包来提高性能?
尝试 lapply
而不是使用循环。也许这会解决。
library('XML')
数据
df1 <- data.frame(a = 1:7, b = letters[1:7], stringsAsFactors = FALSE)
代码
# create a new xml doc
doc_xml <- newXMLDoc(isHTML = FALSE)
# create a table node
table_node <- newXMLNode("table", doc = doc_xml)
# row data
row_data <- apply(df1, 1, function(x) {
z1 <- newXMLNode('row') # create a new node for each row
addChildren(z1, lapply(names(x), function(y) newXMLNode(y, x[y])))
})
# add row data to table node
xmlParent(row_data) <- table_node
# save as xml file
saveXML(doc_xml, file = "df1.xml")
输出
doc_xml
# <?xml version="1.0"?>
# <table>
# <row>
# <a>1</a>
# <b>a</b>
# </row>
# <row>
# <a>2</a>
# <b>b</b>
# </row>
# <row>
# <a>3</a>
# <b>c</b>
# </row>
# <row>
# <a>4</a>
# <b>d</b>
# </row>
# <row>
# <a>5</a>
# <b>e</b>
# </row>
# <row>
# <a>6</a>
# <b>f</b>
# </row>
# <row>
# <a>7</a>
# <b>g</b>
# </row>
# </table>
验证节点
getNodeSet(doc_xml, "//a")
getNodeSet(doc_xml, "//b")
将xml转换为数据帧
# using xpath expression of xml data inside R
xmlToDataFrame(nodes = getNodeSet(doc_xml, "//table/*"),
stringsAsFactors = FALSE,
colClasses = c('integer', 'character'))
# using name of xml data inside R
xmlToDataFrame(doc = doc_xml,
stringsAsFactors = FALSE,
colClasses = c('integer', 'character'))
# from xml file
xmlToDataFrame(doc = "df1.xml",
stringsAsFactors = FALSE,
colClasses = c('integer', 'character'))
# a b
# 1 1 a
# 2 2 b
# 3 3 c
# 4 4 d
# 5 5 e
# 6 6 f
# 7 7 g