从压缩文件下载文本文件,通过 url 访问,下载到工作目录而不是 R 中的全局环境
Text files being downloaded from zipped files, accessed via urls, being downloaded to working directory and not to global environment in R
我正在尝试检索与特定模式匹配的多个数据 txt 文件,形成多个我通过 urls 访问的压缩文件。我写了一个脚本,从 url 下载所需的数据帧文件,将它们保存在列表中,然后将所有数据帧绑定在一起。然后我将该函数应用于 urls.
的列表
我想要的最终结果是在 R 的全局环境中将所有 url 中的所有下载数据都放在一个数据框中。
但是目前,单个文件被下载到我不想要的工作目录中,并且没有合并到单个数据框中。我想知道这个问题是否源于 download.file
,但我一直无法找到解决方案或类似问题的帖子。
# list of urls
url_df = data.frame(model = c("rcp26", "rcp45", "rcp85"),
url = c("https://b2share.eudat.eu/api/files/d4850267-3ce2-44f4-b5e3-8391a4f3dc27/LTER_site_data_from_EURO-CORDEX-RCMs_rel1.see_disclaimer.77c127c4-2ebe-453b-b5af-61858ff02e31.huss_historical_rcp26_day_txt.zip",
"https://b2share.eudat.eu/api/files/d4850267-3ce2-44f4-b5e3-8391a4f3dc27/LTER_site_data_from_EURO-CORDEX-RCMs_rel1.see_disclaimer.77c127c4-2ebe-453b-b5af-61858ff02e31.huss_historical_rcp45_day_txt.zip",
"https://b2share.eudat.eu/api/files/d4850267-3ce2-44f4-b5e3-8391a4f3dc27/LTER_site_data_from_EURO-CORDEX-RCMs_rel1.see_disclaimer.77c127c4-2ebe-453b-b5af-61858ff02e31.huss_historical_rcp85_day_txt.zip"))
# create empty dataframe where data will be saved
downloaded_data = data.frame()
# create function to retrieve desired files from a single url
get_data = function(url) {
temp <- tempfile() # create temp file
download.file(url,temp) # download file contained in the url
# get a list of the desired files
file.list <- grep("KNMI-RACMO22E.*txt|MPI-CSC-REMO.*txt|SMHI-RCA4.*txt", unzip(temp, list=TRUE)$Name, ignore.case=TRUE, value=TRUE)
data.list = lapply(unzip(temp, files=file.list), read.table, header=FALSE, comment.char = "", check.names = FALSE)
# bind the dataframes in the list into one single dataframe
bound_data = dplyr::bind_rows(data.list)
downloaded_data = rbind(downloaded_data, bound_data )
return(downloaded_data)
unlink(temp)
}
# apply function over the list of urls
sapply(url_df$url, get_data)
如有任何帮助,我们将不胜感激!
您不能在函数内引用 downloaded_data
-- 该函数将分别应用于每个 URL,然后您可以将它们绑定在一起以创建 downloaded_data
。对数据的解压缩和读入也进行了一些更改,以确保文件确实被读入。
# list of urls
url_df = data.frame(model = c("rcp26", "rcp45", "rcp85"),
url = c("https://b2share.eudat.eu/api/files/d4850267-3ce2-44f4-b5e3-8391a4f3dc27/LTER_site_data_from_EURO-CORDEX-RCMs_rel1.see_disclaimer.77c127c4-2ebe-453b-b5af-61858ff02e31.huss_historical_rcp26_day_txt.zip",
"https://b2share.eudat.eu/api/files/d4850267-3ce2-44f4-b5e3-8391a4f3dc27/LTER_site_data_from_EURO-CORDEX-RCMs_rel1.see_disclaimer.77c127c4-2ebe-453b-b5af-61858ff02e31.huss_historical_rcp45_day_txt.zip",
"https://b2share.eudat.eu/api/files/d4850267-3ce2-44f4-b5e3-8391a4f3dc27/LTER_site_data_from_EURO-CORDEX-RCMs_rel1.see_disclaimer.77c127c4-2ebe-453b-b5af-61858ff02e31.huss_historical_rcp85_day_txt.zip"))
# create function to retrieve desired files from a single url
get_data = function(url) {
temp <- tempdir() # create temp file
download.file(url, file.path(temp, "downloaded.zip")) # download file contained in the url
downloaded_files <- unzip(file.path(temp, "downloaded.zip"), exdir = temp)
keep_files <- downloaded_files[grep("KNMI-RACMO22E.*txt|MPI-CSC-REMO.*txt|SMHI-RCA4.*txt",
downloaded_files)]
data.list <- lapply(keep_files, read.table, header=FALSE, comment.char = "", check.names = FALSE)
# bind the dataframes in the list into one single dataframe
bound_data = dplyr::bind_rows(data.list)
return(bound_data)
unlink(temp)
}
# apply function over the list of urls
downloaded_data <- dplyr::bind_rows(lapply(url_df$url, get_data))
dim(downloaded_data)
#> [1] 912962 7
我正在尝试检索与特定模式匹配的多个数据 txt 文件,形成多个我通过 urls 访问的压缩文件。我写了一个脚本,从 url 下载所需的数据帧文件,将它们保存在列表中,然后将所有数据帧绑定在一起。然后我将该函数应用于 urls.
的列表我想要的最终结果是在 R 的全局环境中将所有 url 中的所有下载数据都放在一个数据框中。
但是目前,单个文件被下载到我不想要的工作目录中,并且没有合并到单个数据框中。我想知道这个问题是否源于 download.file
,但我一直无法找到解决方案或类似问题的帖子。
# list of urls
url_df = data.frame(model = c("rcp26", "rcp45", "rcp85"),
url = c("https://b2share.eudat.eu/api/files/d4850267-3ce2-44f4-b5e3-8391a4f3dc27/LTER_site_data_from_EURO-CORDEX-RCMs_rel1.see_disclaimer.77c127c4-2ebe-453b-b5af-61858ff02e31.huss_historical_rcp26_day_txt.zip",
"https://b2share.eudat.eu/api/files/d4850267-3ce2-44f4-b5e3-8391a4f3dc27/LTER_site_data_from_EURO-CORDEX-RCMs_rel1.see_disclaimer.77c127c4-2ebe-453b-b5af-61858ff02e31.huss_historical_rcp45_day_txt.zip",
"https://b2share.eudat.eu/api/files/d4850267-3ce2-44f4-b5e3-8391a4f3dc27/LTER_site_data_from_EURO-CORDEX-RCMs_rel1.see_disclaimer.77c127c4-2ebe-453b-b5af-61858ff02e31.huss_historical_rcp85_day_txt.zip"))
# create empty dataframe where data will be saved
downloaded_data = data.frame()
# create function to retrieve desired files from a single url
get_data = function(url) {
temp <- tempfile() # create temp file
download.file(url,temp) # download file contained in the url
# get a list of the desired files
file.list <- grep("KNMI-RACMO22E.*txt|MPI-CSC-REMO.*txt|SMHI-RCA4.*txt", unzip(temp, list=TRUE)$Name, ignore.case=TRUE, value=TRUE)
data.list = lapply(unzip(temp, files=file.list), read.table, header=FALSE, comment.char = "", check.names = FALSE)
# bind the dataframes in the list into one single dataframe
bound_data = dplyr::bind_rows(data.list)
downloaded_data = rbind(downloaded_data, bound_data )
return(downloaded_data)
unlink(temp)
}
# apply function over the list of urls
sapply(url_df$url, get_data)
如有任何帮助,我们将不胜感激!
您不能在函数内引用 downloaded_data
-- 该函数将分别应用于每个 URL,然后您可以将它们绑定在一起以创建 downloaded_data
。对数据的解压缩和读入也进行了一些更改,以确保文件确实被读入。
# list of urls
url_df = data.frame(model = c("rcp26", "rcp45", "rcp85"),
url = c("https://b2share.eudat.eu/api/files/d4850267-3ce2-44f4-b5e3-8391a4f3dc27/LTER_site_data_from_EURO-CORDEX-RCMs_rel1.see_disclaimer.77c127c4-2ebe-453b-b5af-61858ff02e31.huss_historical_rcp26_day_txt.zip",
"https://b2share.eudat.eu/api/files/d4850267-3ce2-44f4-b5e3-8391a4f3dc27/LTER_site_data_from_EURO-CORDEX-RCMs_rel1.see_disclaimer.77c127c4-2ebe-453b-b5af-61858ff02e31.huss_historical_rcp45_day_txt.zip",
"https://b2share.eudat.eu/api/files/d4850267-3ce2-44f4-b5e3-8391a4f3dc27/LTER_site_data_from_EURO-CORDEX-RCMs_rel1.see_disclaimer.77c127c4-2ebe-453b-b5af-61858ff02e31.huss_historical_rcp85_day_txt.zip"))
# create function to retrieve desired files from a single url
get_data = function(url) {
temp <- tempdir() # create temp file
download.file(url, file.path(temp, "downloaded.zip")) # download file contained in the url
downloaded_files <- unzip(file.path(temp, "downloaded.zip"), exdir = temp)
keep_files <- downloaded_files[grep("KNMI-RACMO22E.*txt|MPI-CSC-REMO.*txt|SMHI-RCA4.*txt",
downloaded_files)]
data.list <- lapply(keep_files, read.table, header=FALSE, comment.char = "", check.names = FALSE)
# bind the dataframes in the list into one single dataframe
bound_data = dplyr::bind_rows(data.list)
return(bound_data)
unlink(temp)
}
# apply function over the list of urls
downloaded_data <- dplyr::bind_rows(lapply(url_df$url, get_data))
dim(downloaded_data)
#> [1] 912962 7