XML:从xml查询中提取所有时间序列
XML: Extracting all time series from the xml query
我正在寻找一种有效的解决方案来提取 xml 查询后面的所有时间序列。我的代码是:
library(xml2)
# URL of the data provider
url.iscb <- "http://www.sedlabanki.is/xmltimeseries/"
# The data frame to store all the time series
iscb.rates <- data.frame()
# Dates defining the time range
d.all <- as.Date("1990-01-01")
d.now <- Sys.Date()
# XML
u <- paste0(url.iscb,"Default.aspx?DagsFra=",d.all,"T00%3a00%3a00&DagsTil=",
d.now,"T23%3a59%3a59&GroupID=1&Type=xml")
# Obtaining the data from the web site...
f <- xml2::read_xml(u)
doc <- xml2::as_list(f)
到目前为止,我无法提取 f
中的所有时间序列。变量 doc
似乎只存储一个时间序列。
试试这个:
library(xml2)
library(magrittr)
# URL of the data provider
url.iscb <- "http://www.sedlabanki.is/xmltimeseries/"
# Dates defining the time range
d.all <- as.Date("1990-01-01")
d.now <- Sys.Date()
# XML
u <- paste0(url.iscb,"Default.aspx?DagsFra=",d.all,"T00%3a00%3a00&DagsTil=",
d.now,"T23%3a59%3a59&GroupID=1&Type=xml")
# Obtaining the data from the web site...
f <- xml2::read_xml(u)
#Find the timeseries
timeseries <- xml_find_all(f, ".//TimeSeries")
timeseriesID <- timeseries %>% xml_attr("ID")
#timeseries %>% xml_find_all(".//Name") %>% xml_text()
#now step through each timeseries and extract the data
dfs <- lapply(1:length(timeseries), function(index){
currentNode <- timeseries[index]
#Find all of the Entry Nodes
entries <- xml_find_all(currentNode, ".//Entry")
#Extract the Date and Value from each node
dates <- xml_find_first(entries, ".//Date") %>% xml_text()
values <- xml_find_first(entries, ".//Value") %>% xml_double()
# The data frame to store all the time series
iscb.rates <- data.frame(timeseriesID[index], dates, values)
})
#dfs is a list of dataframes
#combine into 1 dataframe
dplyr::bind_rows(dfs)
我正在寻找一种有效的解决方案来提取 xml 查询后面的所有时间序列。我的代码是:
library(xml2)
# URL of the data provider
url.iscb <- "http://www.sedlabanki.is/xmltimeseries/"
# The data frame to store all the time series
iscb.rates <- data.frame()
# Dates defining the time range
d.all <- as.Date("1990-01-01")
d.now <- Sys.Date()
# XML
u <- paste0(url.iscb,"Default.aspx?DagsFra=",d.all,"T00%3a00%3a00&DagsTil=",
d.now,"T23%3a59%3a59&GroupID=1&Type=xml")
# Obtaining the data from the web site...
f <- xml2::read_xml(u)
doc <- xml2::as_list(f)
到目前为止,我无法提取 f
中的所有时间序列。变量 doc
似乎只存储一个时间序列。
试试这个:
library(xml2)
library(magrittr)
# URL of the data provider
url.iscb <- "http://www.sedlabanki.is/xmltimeseries/"
# Dates defining the time range
d.all <- as.Date("1990-01-01")
d.now <- Sys.Date()
# XML
u <- paste0(url.iscb,"Default.aspx?DagsFra=",d.all,"T00%3a00%3a00&DagsTil=",
d.now,"T23%3a59%3a59&GroupID=1&Type=xml")
# Obtaining the data from the web site...
f <- xml2::read_xml(u)
#Find the timeseries
timeseries <- xml_find_all(f, ".//TimeSeries")
timeseriesID <- timeseries %>% xml_attr("ID")
#timeseries %>% xml_find_all(".//Name") %>% xml_text()
#now step through each timeseries and extract the data
dfs <- lapply(1:length(timeseries), function(index){
currentNode <- timeseries[index]
#Find all of the Entry Nodes
entries <- xml_find_all(currentNode, ".//Entry")
#Extract the Date and Value from each node
dates <- xml_find_first(entries, ".//Date") %>% xml_text()
values <- xml_find_first(entries, ".//Value") %>% xml_double()
# The data frame to store all the time series
iscb.rates <- data.frame(timeseriesID[index], dates, values)
})
#dfs is a list of dataframes
#combine into 1 dataframe
dplyr::bind_rows(dfs)