在不同目录中导入特定文件并附加它们
Importing specific file in different directories and appending them
我在不同的压缩目录中有相关文件。 目录1中的文件1对应于目录2中的文件1,目录1中的文件2对应于目录2中的文件2...。我能够使用 map_dfr 但这会导入每个目录中的文件并附加它们;这是不可取的。
下面是带有期望的代码:
library(tidyverse)
library(zip)
dir.create("data/test")
df1 <- tribble(
~"var1", ~"var2", ~"var3", ~"var4",~"var5",
10, 20, 23, "Male", "No",
6, 20, 30, "Female","Yes",
7, 8, 30, "Female","No",
10, 20, 30, "Male", "Yes"
)
df2 <- tribble(
~"var1", ~"var2", ~"var3", ~"var4",
10, 20, 23, "Male",
6, 20, 30, "Female",
7, 8, 30, "Female",
10, 20, 30, "Male"
)
write_csv(df1, file = "data/test/df1.csv")
write_csv(df2, file = "data/test/df2.csv")
zip("data/zip1.zip", c("data/test/df1.csv",
"data/test/df2.csv"))
zip("data/zip2.zip", c("data/test/df1.csv",
"data/test/df2.csv"))
all_zips <- list.files("data", pattern = ".zip", recursive = TRUE, full.names = T)
all_files <- vector("list")
for(f in all_zips){
all_files[[f]] <- f
}
toimport <- lapply(all_files, utils::unzip)
map_dfr(toimport[[1]], read_csv) # Undesirable
# Desirable
file_1 <- read_csv(toimport[["data/zip1.zip"]][[1]]) %>%
bind_rows(read_csv(toimport[["data/zip2.zip"]][[1]]))
file_2 <- read_csv(toimport[["data/zip1.zip"]][[2]]) %>%
bind_rows(read_csv(toimport[["data/zip2.zip"]][[2]]))
您可以转置和简化要读入的文件列表:
library(purrr)
library(readr)
map(simplify_all(transpose(toimport)), map_df, read_csv, show_col_types = FALSE)
[[1]]
# A tibble: 8 x 5
var1 var2 var3 var4 var5
<dbl> <dbl> <dbl> <chr> <chr>
1 10 20 23 Male No
2 6 20 30 Female Yes
3 7 8 30 Female No
4 10 20 30 Male Yes
5 10 20 23 Male No
6 6 20 30 Female Yes
7 7 8 30 Female No
8 10 20 30 Male Yes
[[2]]
# A tibble: 8 x 4
var1 var2 var3 var4
<dbl> <dbl> <dbl> <chr>
1 10 20 23 Male
2 6 20 30 Female
3 7 8 30 Female
4 10 20 30 Male
5 10 20 23 Male
6 6 20 30 Female
7 7 8 30 Female
8 10 20 30 Male
或使用data.table::transpose()
:
map(data.table::transpose(toimport), map_df, read_csv, show_col_types = FALSE)
我在不同的压缩目录中有相关文件。 目录1中的文件1对应于目录2中的文件1,目录1中的文件2对应于目录2中的文件2...。我能够使用 map_dfr 但这会导入每个目录中的文件并附加它们;这是不可取的。 下面是带有期望的代码:
library(tidyverse)
library(zip)
dir.create("data/test")
df1 <- tribble(
~"var1", ~"var2", ~"var3", ~"var4",~"var5",
10, 20, 23, "Male", "No",
6, 20, 30, "Female","Yes",
7, 8, 30, "Female","No",
10, 20, 30, "Male", "Yes"
)
df2 <- tribble(
~"var1", ~"var2", ~"var3", ~"var4",
10, 20, 23, "Male",
6, 20, 30, "Female",
7, 8, 30, "Female",
10, 20, 30, "Male"
)
write_csv(df1, file = "data/test/df1.csv")
write_csv(df2, file = "data/test/df2.csv")
zip("data/zip1.zip", c("data/test/df1.csv",
"data/test/df2.csv"))
zip("data/zip2.zip", c("data/test/df1.csv",
"data/test/df2.csv"))
all_zips <- list.files("data", pattern = ".zip", recursive = TRUE, full.names = T)
all_files <- vector("list")
for(f in all_zips){
all_files[[f]] <- f
}
toimport <- lapply(all_files, utils::unzip)
map_dfr(toimport[[1]], read_csv) # Undesirable
# Desirable
file_1 <- read_csv(toimport[["data/zip1.zip"]][[1]]) %>%
bind_rows(read_csv(toimport[["data/zip2.zip"]][[1]]))
file_2 <- read_csv(toimport[["data/zip1.zip"]][[2]]) %>%
bind_rows(read_csv(toimport[["data/zip2.zip"]][[2]]))
您可以转置和简化要读入的文件列表:
library(purrr)
library(readr)
map(simplify_all(transpose(toimport)), map_df, read_csv, show_col_types = FALSE)
[[1]]
# A tibble: 8 x 5
var1 var2 var3 var4 var5
<dbl> <dbl> <dbl> <chr> <chr>
1 10 20 23 Male No
2 6 20 30 Female Yes
3 7 8 30 Female No
4 10 20 30 Male Yes
5 10 20 23 Male No
6 6 20 30 Female Yes
7 7 8 30 Female No
8 10 20 30 Male Yes
[[2]]
# A tibble: 8 x 4
var1 var2 var3 var4
<dbl> <dbl> <dbl> <chr>
1 10 20 23 Male
2 6 20 30 Female
3 7 8 30 Female
4 10 20 30 Male
5 10 20 23 Male
6 6 20 30 Female
7 7 8 30 Female
8 10 20 30 Male
或使用data.table::transpose()
:
map(data.table::transpose(toimport), map_df, read_csv, show_col_types = FALSE)