excel 个具有相同前缀的工作表到 R 中的新 excel 文件
excel sheets with same prefix to a new excel file in R
我正在尝试根据 sheet 名称中的前缀将具有多个 sheet 的 excel 文件拆分为一个新文件。
我想输出所有前缀为 a_ 的 sheet 到 a.xslx , b_ 到 b.xslx 等等
目前的代码如下:
library(readxl)
library(readr)
library(writexl)
#get sheet names
sheets <- excel_sheets("/path/to/excel/file")
filenames <- paste0(sheets, ".xslx")
#sheet prefix
m <- regexpr("^.*_", filenames)
file_prefix <- unique(regmatches(filenames, m))
prefnames <- paste0(file_prefix, ".xslx")
#read_excel - sheets with same prefix
datspref <- lapply(file_prefix, read_excel, path = "/path/to/excel/file")
#save sheets with same prefix to a new excel file
lapply(seq_along(dats), function(i) write_xlsx(dats[[i]], prefnames[i]))
感谢任何帮助!
设置:我使用以下 sheet 制作了 sample.xlsx
(每个 sheet 上的相同 2x2 table):
xlsxfile <- "sample.xlsx"
sheetnames <- readxl::excel_sheets(xlsxfile)
sheetnames
# [1] "a_1" "a_2" "a_3" "b_1" "b_2" "c_1" "c_3"
newxlsxfiles <- sub("_.*", "", sheetnames)
newxlsxfiles
# [1] "a" "a" "a" "b" "b" "c" "c"
dats <- lapply(setNames(nm = sheetnames), readxl::read_xlsx, path = xlsxfile)
str(dats)
# List of 7
# $ a_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ a_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ a_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ b_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ b_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ c_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ c_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
datspl <- split(dats, newxlsxfiles)
str(datspl)
# List of 3
# $ a:List of 3
# ..$ a_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# ..$ a_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# ..$ a_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# $ b:List of 2
# ..$ b_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# ..$ b_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# $ c:List of 2
# ..$ c_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# ..$ c_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
invisible( Map(writexl::write_xlsx, datspl, paste0(names(datspl), ".xlsx")) )
list.files(pattern = "xlsx$")
# [1] "a.xlsx" "b.xlsx" "c.xlsx"
readxl::excel_sheets("a.xlsx")
# [1] "a_1" "a_2" "a_3"
仅供参考:如果您需要从 sheet 名称中删除前导 a_
,只需在拆分之前更改 dats
的名称,也许
# ...
names(dats) <- sub("^[^_]*_", "", sheetnames)
datspl <- split(dats, newxlsxfiles)
# ...
我正在尝试根据 sheet 名称中的前缀将具有多个 sheet 的 excel 文件拆分为一个新文件。
我想输出所有前缀为 a_ 的 sheet 到 a.xslx , b_ 到 b.xslx 等等
目前的代码如下:
library(readxl)
library(readr)
library(writexl)
#get sheet names
sheets <- excel_sheets("/path/to/excel/file")
filenames <- paste0(sheets, ".xslx")
#sheet prefix
m <- regexpr("^.*_", filenames)
file_prefix <- unique(regmatches(filenames, m))
prefnames <- paste0(file_prefix, ".xslx")
#read_excel - sheets with same prefix
datspref <- lapply(file_prefix, read_excel, path = "/path/to/excel/file")
#save sheets with same prefix to a new excel file
lapply(seq_along(dats), function(i) write_xlsx(dats[[i]], prefnames[i]))
感谢任何帮助!
设置:我使用以下 sheet 制作了 sample.xlsx
(每个 sheet 上的相同 2x2 table):
xlsxfile <- "sample.xlsx"
sheetnames <- readxl::excel_sheets(xlsxfile)
sheetnames
# [1] "a_1" "a_2" "a_3" "b_1" "b_2" "c_1" "c_3"
newxlsxfiles <- sub("_.*", "", sheetnames)
newxlsxfiles
# [1] "a" "a" "a" "b" "b" "c" "c"
dats <- lapply(setNames(nm = sheetnames), readxl::read_xlsx, path = xlsxfile)
str(dats)
# List of 7
# $ a_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ a_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ a_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ b_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ b_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ c_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
# $ c_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# ..$ a: num 1
# ..$ b: num 2
datspl <- split(dats, newxlsxfiles)
str(datspl)
# List of 3
# $ a:List of 3
# ..$ a_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# ..$ a_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# ..$ a_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# $ b:List of 2
# ..$ b_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# ..$ b_2: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# $ c:List of 2
# ..$ c_1: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
# ..$ c_3: tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
# .. ..$ a: num 1
# .. ..$ b: num 2
invisible( Map(writexl::write_xlsx, datspl, paste0(names(datspl), ".xlsx")) )
list.files(pattern = "xlsx$")
# [1] "a.xlsx" "b.xlsx" "c.xlsx"
readxl::excel_sheets("a.xlsx")
# [1] "a_1" "a_2" "a_3"
仅供参考:如果您需要从 sheet 名称中删除前导 a_
,只需在拆分之前更改 dats
的名称,也许
# ...
names(dats) <- sub("^[^_]*_", "", sheetnames)
datspl <- split(dats, newxlsxfiles)
# ...