在 R 中的 if 语句之后,将每个第 n 个文件保存到一个新的子文件夹中

Save every nth file to a new subfolder following a if statement in R

首先,如果这个问题已经在其他地方得到解决但我找不到答案,我深表歉意。

R中我有一个for loop根据特定条件将文本(writeLines)保存到文件中。我想将每 10 个写入的文件保存到一个新的不同子文件夹中。这样每个子文件夹最多只能有10个文件。

现在,如果要写入所有文件(见下文),我可以编写一个脚本来执行此操作(见下文),但我不确定每次文件夹充满 10 个文件时如何实现子文件夹的更改。

下面的例子应该清楚。

# generate random num
set.seed(15)
input <- rnorm(100, mean = 10, sd = 3)
# Write to file only if num is greater than threshold
thrshld <- 10
out_dir <- "~/R/tmp/Batch_Saving"

k <- 1
i <- 1
for (i in 1:length(input)) {

  if ( any(i == seq(from = 1, to = length(input), by = 10) ) ) {

    # Create a new subfolder everytime i is a multiple of 10
    sub_dir <- paste0(out_dir, "/Dir_Num_", k) 
    k <- k+1

  } 

  if ( input[i] > thrshld) {

    txt <- paste("The number", signif(input[i], 4), "is greater than", thrshld)

    # Create folder if it doesn't exist
    if (!dir.exists(sub_dir)) { dir.create(sub_dir, recursive = T) }

    # Write text to file
    writeLines(text = txt,
               con = paste0(sub_dir, "/file_", i, ".txt") )

  }

}

此脚本创建 10 个文件夹,每个文件夹少于 10 个文件,但文件数量不同。人们可以像这样在终端中快速检查:

Batch_Saving > find . -maxdepth 1 -mindepth 1 -type d -exec sh -c 'echo "{} : $(find "{}" -type f | wc -l)" file\(s\)' \;

./Dir_Num_4 :        6 file(s)
./Dir_Num_3 :        7 file(s)
./Dir_Num_2 :        4 file(s)
./Dir_Num_5 :        7 file(s)
./Dir_Num_10 :        4 file(s)
./Dir_Num_9 :        5 file(s)
./Dir_Num_7 :        3 file(s)
./Dir_Num_6 :        6 file(s)
./Dir_Num_1 :        6 file(s)
./Dir_Num_8 :        4 file(s)

我认为实现此目的的唯一方法是每次读取已写入的文件数或以某种方式跟踪已写入的文件数并相应地更改参数 k

我相信这也可以通过其他方法实现(即 apply),但为了学习,我想在循环中编写代码。

非常感谢!

试试这个:

# generate random num
set.seed(15)
input <- rnorm(100, mean = 10, sd = 3)

# Write to file only if num is greater than threshold
thrshld <- 10
out_dir <- "~/R/tmp/Batch_Saving"

for (i in 1:length(input)) {

  # Create sub-folder if it doesn't exist
  sub_dir = paste0(out_dir, "/Dir_Num_", ceiling(i/10))
  dir.create(sub_dir, showWarnings = F)

  if ( input[i] > thrshld) {

    txt <- paste("The number", signif(input[i], 4), "is greater than", thrshld)

    # Write text to file
    writeLines(text = txt,
               con = paste0(sub_dir, "/file_", i, ".txt") )

  }

}

子文件夹中的文件数

sapply(list.dirs(), function(x)length(list.files(x)))

#            .  ./Dir_Num_1 ./Dir_Num_10  ./Dir_Num_2  ./Dir_Num_3  ./Dir_Num_4 
#           11            6            4            4            7            6 
#  ./Dir_Num_5  ./Dir_Num_6  ./Dir_Num_7  ./Dir_Num_8  ./Dir_Num_9 
#            7            6            3            4            5 

尝试在 i 循环中嵌套用于创建子目录的 k 循环以进行阈值检查,如下所示:

for (i in 1:length(input)) {
  if (input[i] > thrshld) {
    if ( k %in% seq(from = 1, to = length(input), by = 10) ) {
      # Create a new subfolder everytime i is a multiple of 10
      sub_dir <- paste0(out_dir, "/Dir_Num_", k) 
    } 
    k <- k+1
#    
    txt <- paste("The number", signif(input[i], 4), "is greater than", thrshld)
#    
    # Create folder if it doesn't exist
    if (!dir.exists(sub_dir)) { dir.create(sub_dir, recursive = T) }
    # Write text to file
    writeLines(text = txt,
               con = paste0(sub_dir, "/file_", i, ".txt") )
  }
}

Results in the following output

> list.files("~/R/tmp", recursive = T)
 [1] "Dir_Num_1/file_1.txt"   "Dir_Num_1/file_11.txt"  "Dir_Num_1/file_13.txt"  "Dir_Num_1/file_15.txt" 
 [5] "Dir_Num_1/file_18.txt"  "Dir_Num_1/file_2.txt"   "Dir_Num_1/file_4.txt"   "Dir_Num_1/file_5.txt"  
 [9] "Dir_Num_1/file_7.txt"   "Dir_Num_1/file_8.txt"   "Dir_Num_11/file_21.txt" "Dir_Num_11/file_22.txt"
[13] "Dir_Num_11/file_25.txt" "Dir_Num_11/file_26.txt" "Dir_Num_11/file_27.txt" "Dir_Num_11/file_29.txt"
[17] "Dir_Num_11/file_30.txt" "Dir_Num_11/file_32.txt" "Dir_Num_11/file_33.txt" "Dir_Num_11/file_34.txt"
[21] "Dir_Num_21/file_35.txt" "Dir_Num_21/file_36.txt" "Dir_Num_21/file_38.txt" "Dir_Num_21/file_41.txt"
[25] "Dir_Num_21/file_42.txt" "Dir_Num_21/file_43.txt" "Dir_Num_21/file_46.txt" "Dir_Num_21/file_48.txt"
[29] "Dir_Num_21/file_49.txt" "Dir_Num_21/file_50.txt" "Dir_Num_31/file_51.txt" "Dir_Num_31/file_52.txt"
[33] "Dir_Num_31/file_54.txt" "Dir_Num_31/file_55.txt" "Dir_Num_31/file_58.txt" "Dir_Num_31/file_59.txt"
[37] "Dir_Num_31/file_61.txt" "Dir_Num_31/file_68.txt" "Dir_Num_31/file_70.txt" "Dir_Num_31/file_75.txt"
[41] "Dir_Num_41/file_77.txt" "Dir_Num_41/file_78.txt" "Dir_Num_41/file_80.txt" "Dir_Num_41/file_81.txt"
[45] "Dir_Num_41/file_82.txt" "Dir_Num_41/file_83.txt" "Dir_Num_41/file_88.txt" "Dir_Num_41/file_89.txt"
[49] "Dir_Num_41/file_93.txt" "Dir_Num_41/file_94.txt" "Dir_Num_51/file_96.txt" "Dir_Num_51/file_98.txt"

根据 nurandi 的回答,我添加了另一个计数器 c 来计算一个文件被写入了多少次,并且 if-statement 我检查它这个数字增加了 10,并且每次它我要创建一个编号为 k.

的新文件夹吗
# generate random num
set.seed(15)
input <- rnorm(100, mean = 10, sd = 3)
# Write to file only if num is greater than threshold
thrshld <- 10
out_dir <- "~/R/tmp/Batch_Saving"
k <- 1
i <- 1
c <- 1
for (i in 1:length(input)) {

  if (input[i] > thrshld) {

    txt <- paste("The number", signif(input[i], 4), "is greater than", thrshld)

    if ( c %in% seq(from = 1, to = length(input), by = 10) ) {
      # Create a new subfolder everytime c is higher than a multiple of 10
      sub_dir <- paste0(out_dir, "/Dir_Num_", k) 
      k <- k + 1
    } 

    # Create folder if it doesn't exist
    if (!dir.exists(sub_dir)) { dir.create(sub_dir, recursive = T) }
    # Write text to file
    writeLines(text = txt,
               con = paste0(sub_dir, "/file_", i, ".txt") )

    # Increase wrote files counter to +1
    c <- c + 1
  }
}

以便文件夹按升序​​编号。

sapply(list.dirs(recursive = F), function(x) length( list.files(x) ) )

./Dir_Num_1 ./Dir_Num_2 ./Dir_Num_3 ./Dir_Num_4 
         10          10          10          10 
./Dir_Num_5 ./Dir_Num_6 
         10           2