读取多个 CSV 文件并替换名称
Read multiple CSV files and replacing names
假设目录中有文件 2021-02.csv、2021-2.csv、... 和 2021-12.csv C:/R/month_data 我们想将它们全部读入单独的数据框中,但用月份名称替换原始名称(例如 2021-01.csv应该变成“jan”,2021-02.csv变成“feb", ..., 2021-12.csv 到 "dec").
代码的目标是总共有12个dfs。
我写了下面的代码:
filenames = list.files(path = "C:/R/month_data",
pattern = "2021-+.*csv")
names = substr(filenames,1,7)
months = c("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec")
for(i in names){
for (j in months){
filepath = file.path("C:/R/month_data",paste(i,".csv",sep=""))
assign(j, read.csv(filepath))
}
}
代码在技术上运行但它没有将月份编号(例如 2021-04)与正确的月份名称(例如“apr ")
我该怎么办?
这是一个可能的方法
library(data.table)
# get files to read
f <- list.files("c:/R/month_data/", pattern = "^2021-.*\.csv$", full.names = TRUE)
# destill months from filename
f.month <- month.abb[as.numeric(gsub("2021-(.*)\.csv$", "\1", basename(f)))]
#if you want month names based on a locale, you can use
# lubridate::month(
# as.numeric(gsub("2021-(.*)\.csv$", "\1", basename(f))),
# label = TRUE, abbr = TRUE)
# read the csv files
L <- lapply(f, data.table::fread)
# pass names to the list
names(L) <- f.month
# pass list's contents to the global environment
list2env(L, envir = .GlobalEnv)
如果要命名每个数据框,可以使用 lapply
将其读入列表并重命名列表的元素。现在您可以访问列表中的数据框。此外,您可以使用 three-letter 英文月份名称的缩写,month.abb
.:
filenames = list.files(path = "C:/R/month_data/",
pattern = "2021-+.*csv")
filepath = paste0("C:/R/month_data/", filenames)
dfs <- lapply(filepath, read.csv)
names(dfs) <- month.abb
View(dfs$Jan)
这是 lubridate::month()
和 tidyr::nest()
的解决方案。
library(tidyverse)
library(lubridate, warn.conflicts = FALSE)
# this is how I would read the file data, but because I don't have files, I will comment out
# filenames <- set_names(filenames, filenames)
# df <- map_dfr(filenames, read_csv, .id = "filename")
# and instead create simulated data at this point
df <- expand_grid(filename = c("2020-02.csv", "2021-02.csv", "2021-03.csv"), x = 1:5)
# parse the filename text to get a string for month name
df <- df %>%
mutate(
dttm = filename %>%
str_remove(".csv") %>%
str_split("-", simplify = FALSE) %>%
map_chr(~str_c(.[2], "1", .[1], sep = "-")) %>%
mdy(),
mnth = month(dttm, label = TRUE),
new_filename = str_c(mnth, ".csv")
)
print(df)
#> # A tibble: 15 x 5
#> filename x dttm mnth new_filename
#> <chr> <int> <date> <ord> <chr>
#> 1 2020-02.csv 1 2020-02-01 Feb Feb.csv
#> 2 2020-02.csv 2 2020-02-01 Feb Feb.csv
#> 3 2020-02.csv 3 2020-02-01 Feb Feb.csv
#> 4 2020-02.csv 4 2020-02-01 Feb Feb.csv
#> 5 2020-02.csv 5 2020-02-01 Feb Feb.csv
#> 6 2021-02.csv 1 2021-02-01 Feb Feb.csv
#> 7 2021-02.csv 2 2021-02-01 Feb Feb.csv
#> 8 2021-02.csv 3 2021-02-01 Feb Feb.csv
#> 9 2021-02.csv 4 2021-02-01 Feb Feb.csv
#> 10 2021-02.csv 5 2021-02-01 Feb Feb.csv
#> 11 2021-03.csv 1 2021-03-01 Mar Mar.csv
#> 12 2021-03.csv 2 2021-03-01 Mar Mar.csv
#> 13 2021-03.csv 3 2021-03-01 Mar Mar.csv
#> 14 2021-03.csv 4 2021-03-01 Mar Mar.csv
#> 15 2021-03.csv 5 2021-03-01 Mar Mar.csv
# now, one row per month with columns new_filename and data (list of tibbles)
df <- df %>%
select(-c(filename, dttm, mnth)) %>%
nest(data = -new_filename)
print(df)
#> # A tibble: 2 x 2
#> new_filename data
#> <chr> <list>
#> 1 Feb.csv <tibble [10 x 1]>
#> 2 Mar.csv <tibble [5 x 1]>
# write files
df %>%
with(
walk2(data, new_filename, write_csv)
)
由 reprex package (v2.0.1)
创建于 2022-03-21
假设目录中有文件 2021-02.csv、2021-2.csv、... 和 2021-12.csv C:/R/month_data 我们想将它们全部读入单独的数据框中,但用月份名称替换原始名称(例如 2021-01.csv应该变成“jan”,2021-02.csv变成“feb", ..., 2021-12.csv 到 "dec").
代码的目标是总共有12个dfs。
我写了下面的代码:
filenames = list.files(path = "C:/R/month_data",
pattern = "2021-+.*csv")
names = substr(filenames,1,7)
months = c("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec")
for(i in names){
for (j in months){
filepath = file.path("C:/R/month_data",paste(i,".csv",sep=""))
assign(j, read.csv(filepath))
}
}
代码在技术上运行但它没有将月份编号(例如 2021-04)与正确的月份名称(例如“apr ")
我该怎么办?
这是一个可能的方法
library(data.table)
# get files to read
f <- list.files("c:/R/month_data/", pattern = "^2021-.*\.csv$", full.names = TRUE)
# destill months from filename
f.month <- month.abb[as.numeric(gsub("2021-(.*)\.csv$", "\1", basename(f)))]
#if you want month names based on a locale, you can use
# lubridate::month(
# as.numeric(gsub("2021-(.*)\.csv$", "\1", basename(f))),
# label = TRUE, abbr = TRUE)
# read the csv files
L <- lapply(f, data.table::fread)
# pass names to the list
names(L) <- f.month
# pass list's contents to the global environment
list2env(L, envir = .GlobalEnv)
如果要命名每个数据框,可以使用 lapply
将其读入列表并重命名列表的元素。现在您可以访问列表中的数据框。此外,您可以使用 three-letter 英文月份名称的缩写,month.abb
.:
filenames = list.files(path = "C:/R/month_data/",
pattern = "2021-+.*csv")
filepath = paste0("C:/R/month_data/", filenames)
dfs <- lapply(filepath, read.csv)
names(dfs) <- month.abb
View(dfs$Jan)
这是 lubridate::month()
和 tidyr::nest()
的解决方案。
library(tidyverse)
library(lubridate, warn.conflicts = FALSE)
# this is how I would read the file data, but because I don't have files, I will comment out
# filenames <- set_names(filenames, filenames)
# df <- map_dfr(filenames, read_csv, .id = "filename")
# and instead create simulated data at this point
df <- expand_grid(filename = c("2020-02.csv", "2021-02.csv", "2021-03.csv"), x = 1:5)
# parse the filename text to get a string for month name
df <- df %>%
mutate(
dttm = filename %>%
str_remove(".csv") %>%
str_split("-", simplify = FALSE) %>%
map_chr(~str_c(.[2], "1", .[1], sep = "-")) %>%
mdy(),
mnth = month(dttm, label = TRUE),
new_filename = str_c(mnth, ".csv")
)
print(df)
#> # A tibble: 15 x 5
#> filename x dttm mnth new_filename
#> <chr> <int> <date> <ord> <chr>
#> 1 2020-02.csv 1 2020-02-01 Feb Feb.csv
#> 2 2020-02.csv 2 2020-02-01 Feb Feb.csv
#> 3 2020-02.csv 3 2020-02-01 Feb Feb.csv
#> 4 2020-02.csv 4 2020-02-01 Feb Feb.csv
#> 5 2020-02.csv 5 2020-02-01 Feb Feb.csv
#> 6 2021-02.csv 1 2021-02-01 Feb Feb.csv
#> 7 2021-02.csv 2 2021-02-01 Feb Feb.csv
#> 8 2021-02.csv 3 2021-02-01 Feb Feb.csv
#> 9 2021-02.csv 4 2021-02-01 Feb Feb.csv
#> 10 2021-02.csv 5 2021-02-01 Feb Feb.csv
#> 11 2021-03.csv 1 2021-03-01 Mar Mar.csv
#> 12 2021-03.csv 2 2021-03-01 Mar Mar.csv
#> 13 2021-03.csv 3 2021-03-01 Mar Mar.csv
#> 14 2021-03.csv 4 2021-03-01 Mar Mar.csv
#> 15 2021-03.csv 5 2021-03-01 Mar Mar.csv
# now, one row per month with columns new_filename and data (list of tibbles)
df <- df %>%
select(-c(filename, dttm, mnth)) %>%
nest(data = -new_filename)
print(df)
#> # A tibble: 2 x 2
#> new_filename data
#> <chr> <list>
#> 1 Feb.csv <tibble [10 x 1]>
#> 2 Mar.csv <tibble [5 x 1]>
# write files
df %>%
with(
walk2(data, new_filename, write_csv)
)
由 reprex package (v2.0.1)
创建于 2022-03-21