在不同目录中导入特定文件并附加它们

Importing specific file in different directories and appending them

我在不同的压缩目录中有相关文件。 目录1中的文件1对应于目录2中的文件1,目录1中的文件2对应于目录2中的文件2...。我能够使用 map_dfr 但这会导入每个目录中的文件并附加它们;这是不可取的。 下面是带有期望的代码:

library(tidyverse)
library(zip)

dir.create("data/test")

df1 <- tribble(
  ~"var1", ~"var2", ~"var3", ~"var4",~"var5",
  10, 20, 23, "Male", "No",
  6, 20, 30, "Female","Yes",
  7, 8, 30, "Female","No",
  10, 20, 30, "Male", "Yes"
)

df2 <- tribble(
  ~"var1", ~"var2", ~"var3", ~"var4",
  10, 20, 23, "Male",
  6, 20, 30, "Female",
  7, 8, 30, "Female",
  10, 20, 30, "Male"
)

write_csv(df1, file = "data/test/df1.csv")
write_csv(df2, file = "data/test/df2.csv")



zip("data/zip1.zip", c("data/test/df1.csv",
                       "data/test/df2.csv"))


zip("data/zip2.zip", c("data/test/df1.csv",
                       "data/test/df2.csv"))


all_zips <- list.files("data", pattern = ".zip", recursive = TRUE, full.names = T)

all_files <- vector("list")

for(f in all_zips){
  all_files[[f]] <- f
}

toimport <- lapply(all_files, utils::unzip)


map_dfr(toimport[[1]], read_csv) # Undesirable

# Desirable

file_1 <- read_csv(toimport[["data/zip1.zip"]][[1]]) %>%
  bind_rows(read_csv(toimport[["data/zip2.zip"]][[1]]))


file_2 <- read_csv(toimport[["data/zip1.zip"]][[2]]) %>%
  bind_rows(read_csv(toimport[["data/zip2.zip"]][[2]]))

您可以转置和简化要读入的文件列表:

library(purrr)
library(readr)

map(simplify_all(transpose(toimport)), map_df, read_csv, show_col_types = FALSE)

[[1]]                                                                                                                                                                        
# A tibble: 8 x 5
   var1  var2  var3 var4   var5 
  <dbl> <dbl> <dbl> <chr>  <chr>
1    10    20    23 Male   No   
2     6    20    30 Female Yes  
3     7     8    30 Female No   
4    10    20    30 Male   Yes  
5    10    20    23 Male   No   
6     6    20    30 Female Yes  
7     7     8    30 Female No   
8    10    20    30 Male   Yes  

[[2]]
# A tibble: 8 x 4
   var1  var2  var3 var4  
  <dbl> <dbl> <dbl> <chr> 
1    10    20    23 Male  
2     6    20    30 Female
3     7     8    30 Female
4    10    20    30 Male  
5    10    20    23 Male  
6     6    20    30 Female
7     7     8    30 Female
8    10    20    30 Male  

或使用data.table::transpose():

map(data.table::transpose(toimport), map_df, read_csv, show_col_types = FALSE)