gsub 一次循环 x 行
gsub for loop x rows at a time
我不确定以前是否有人问过这个问题,如果可以更好地措辞,请随时编辑这个问题的标题。我有一个需要移动的文件列表。它们存储在服务器上并且非常大,因此移动它们需要一些时间。我一次只能真正移动 10 个文件,但我可以提交多个作业。所以我想做的是使用 gsub 和 for 循环来编辑一个包含前 10 个文件的脚本,然后是一个包含接下来要移动的 10 个文件的新脚本,然后是一个包含接下来要移动的 10 个文件的新脚本,依此类推。这是我要移动的文件的示例。该列表有 26 个文件...我知道我提到过我想以 10 个为一组移动,但我很好奇如果有 6 个样本而不是 10 个样本会发生什么。输出文件可以命名为任何名称,这并不重要.它们显然需要是独一无二的。我还将附上我尝试 运行 的 for 循环,但我遇到的困难是遍历列表并创建一个唯一的名称
mcli <- structure(list(`mcli[c(1:25), ]` = structure(1:26, .Label = c("mcli mv --recursive /path/to/directory/D00063/alignment/ /path/to/directory/D00063/new.folder/alignment/",
"mcli mv --recursive /path/to/directory/D00063/bam/ /path/to/directory/D00063/new.folder/bam/",
"mcli mv --recursive /path/to/directory/D00063/fastqc/ /path/to/directory/D00063/new.folder/fastqc/",
"mcli mv --recursive /path/to/directory/D00063/gvcf/ /path/to/directory/D00063/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00063/trim/ /path/to/directory/D00063/new.folder/trim/",
"mcli mv --recursive /path/to/directory/D00064/alignment/ /path/to/directory/D00064/new.folder/alignment/",
"mcli mv --recursive /path/to/directory/D00064/bam/ /path/to/directory/D00064/new.folder/bam/",
"mcli mv --recursive /path/to/directory/D00064/fastqc/ /path/to/directory/D00064/new.folder/fastqc/",
"mcli mv --recursive /path/to/directory/D00064/gvcf/ /path/to/directory/D00064/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00064/trim/ /path/to/directory/D00064/new.folder/trim/",
"mcli mv --recursive /path/to/directory/D00105/gvcf/ /path/to/directory/D00105/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00151/gvcf/ /path/to/directory/D00151/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00188/gvcf/ /path/to/directory/D00188/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D002053/D02053/ /path/to/directory/D002053/new.folder/D02053/",
"mcli mv --recursive /path/to/directory/D00220/gvcf/ /path/to/directory/D00220/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00257/gvcf/ /path/to/directory/D00257/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00258/gvcf/ /path/to/directory/D00258/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00264/gvcf/ /path/to/directory/D00264/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00268/alignment/ /path/to/directory/D00268/new.folder/alignment/",
"mcli mv --recursive /path/to/directory/D00268/bam/ /path/to/directory/D00268/new.folder/bam/",
"mcli mv --recursive /path/to/directory/D00268/fastqc/ /path/to/directory/D00268/new.folder/fastqc/",
"mcli mv --recursive /path/to/directory/D00268/gvcf/ /path/to/directory/D00268/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00268/logs/ /path/to/directory/D00268/new.folder/logs/",
"mcli mv --recursive /path/to/directory/D00268/trim/ /path/to/directory/D00268/new.folder/trim/",
"mcli mv --recursive /path/to/directory/D00269/gvcf/ /path/to/directory/D00269/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00270/gvcf/ /path/to/directory/D00270/new.folder/gvcf/"), class = "factor")), .Names = "mcli[c(1:25), ]", row.names = c(NA,
-25L), class = "data.frame")
这是我要编辑的脚本示例
slurm <- structure(list(V1 = c("#!/bin/bash -l", "#SBATCH --nodes=1",
"#SBATCH --ntasks-per-node=10 ", "#SBATCH --mem-per-cpu=5gb",
"#SBATCH -t 20:00:00", "#SBATCH --mail-type=ALL",
"#SBATCH --mail-user=email ", "#SBATCH -o %j.out",
"#SBATCH -e %j.err", "", "", "cd $SLURM_SUBMIT_DIR",
"", "srun --exclusive --ntasks 1 abc123 &", "srun --exclusive --ntasks 1 def456 &",
"srun --exclusive --ntasks 1 ghi789 &", "srun --exclusive --ntasks 1 jkl101112 &",
"srun --exclusive --ntasks 1 mno131415 &", "srun --exclusive --ntasks 1 pqr161718 &",
"srun --exclusive --ntasks 1 stu192021 &", "srun --exclusive --ntasks 1 vwx222324 &",
"srun --exclusive --ntasks 1 yza252627 &", "srun --exclusive --ntasks 1 abc282930 &",
"", "wait")), row.names = c(NA,
-25L), class = c("data.table", "data.frame"))
无效的示例代码
for(i in 1:length(mcli[[1]])){
df2 <- as.data.frame(slurm)
df2[,'V1'] <- gsub("abc123", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("def456", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("ghi789", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("jkl101112", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("mno131415", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("pqr161718", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("stu192021", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("vwx222324", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("yza252627", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("abc282930", mcli[['V1']][i], df2[,'V1'])
write.table(df2, paste(file = "/path/to/random/folder/",mcli[['V1']][i], ".csh", sep = ""), row.names = FALSE, col.names =FALSE, quote = FALSE)
}
预期输出脚本 1
#!/bin/bash -l
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=10
#SBATCH --mem-per-cpu=5gb
#SBATCH -t 20:00:00
#SBATCH --mail-type=ALL
#SBATCH --mail-user=bgwillia@umn.edu
#SBATCH -o %j.out
#SBATCH -e %j.err
cd $SLURM_SUBMIT_DIR
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00063/alignment/ /path/to/directory/D00063/new.folder/alignment/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00063/bam/ /path/to/directory/D00063/new.folder/bam/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00063/fastqc/ /path/to/directory/D00063/new.folder/fastqc/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00063/gvcf/ /path/to/directory/D00063/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00063/trim/ /path/to/directory/D00063/new.folder/trim/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00064/alignment/ /path/to/directory/D00064/new.folder/alignment/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00064/bam/ /path/to/directory/D00064/new.folder/bam/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00064/fastqc/ /path/to/directory/D00064/new.folder/fastqc/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00064/gvcf/ /path/to/directory/D00064/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00064/trim/ /path/to/directory/D00064/new.folder/trim/ &
wait
预期输出脚本 2
#!/bin/bash -l
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=10
#SBATCH --mem-per-cpu=5gb
#SBATCH -t 20:00:00
#SBATCH --mail-type=ALL
#SBATCH --mail-user=bgwillia@umn.edu
#SBATCH -o %j.out
#SBATCH -e %j.err
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00105/gvcf/ /path/to/directory/D00105/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00151/gvcf/ /path/to/directory/D00151/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00188/gvcf/ /path/to/directory/D00188/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D002053/D02053/ /path/to/directory/D002053/new.folder/D02053/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00220/gvcf/ /path/to/directory/D00220/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00257/gvcf/ /path/to/directory/D00257/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00258/gvcf/ /path/to/directory/D00258/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00264/gvcf/ /path/to/directory/D00264/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/alignment/ /path/to/directory/D00268/new.folder/alignment/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/bam/ /path/to/directory/D00268/new.folder/bam/ &
wait
预期输出脚本 3
#!/bin/bash -l
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=10
#SBATCH --mem-per-cpu=5gb
#SBATCH -t 20:00:00
#SBATCH --mail-type=ALL
#SBATCH --mail-user=bgwillia@umn.edu
#SBATCH -o %j.out
#SBATCH -e %j.err
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/fastqc/ /path/to/directory/D00268/new.folder/fastqc/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/gvcf/ /path/to/directory/D00268/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/logs/ /path/to/directory/D00268/new.folder/logs/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/trim/ /path/to/directory/D00268/new.folder/trim/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00269/gvcf/ /path/to/directory/D00269/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00270/gvcf/ /path/to/directory/D00270/new.folder/gvcf/ &
wait
我将对 运行:
的命令向量进行操作,而不是帧
mcli <- c("mcli mv --recursive /path/to/directory/D00063/alignment/ /path/to/directory/D00063/new.folder/alignment/", "mcli mv --recursive /path/to/directory/D00063/bam/ /path/to/directory/D00063/new.folder/bam/", "mcli mv --recursive /path/to/directory/D00063/fastqc/ /path/to/directory/D00063/new.folder/fastqc/", "mcli mv --recursive /path/to/directory/D00063/gvcf/ /path/to/directory/D00063/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00063/trim/ /path/to/directory/D00063/new.folder/trim/", "mcli mv --recursive /path/to/directory/D00064/alignment/ /path/to/directory/D00064/new.folder/alignment/", "mcli mv --recursive /path/to/directory/D00064/bam/ /path/to/directory/D00064/new.folder/bam/", "mcli mv --recursive /path/to/directory/D00064/fastqc/ /path/to/directory/D00064/new.folder/fastqc/", "mcli mv --recursive /path/to/directory/D00064/gvcf/ /path/to/directory/D00064/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00064/trim/ /path/to/directory/D00064/new.folder/trim/", "mcli mv --recursive /path/to/directory/D00105/gvcf/ /path/to/directory/D00105/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00151/gvcf/ /path/to/directory/D00151/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00188/gvcf/ /path/to/directory/D00188/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D002053/D02053/ /path/to/directory/D002053/new.folder/D02053/", "mcli mv --recursive /path/to/directory/D00220/gvcf/ /path/to/directory/D00220/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00257/gvcf/ /path/to/directory/D00257/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00258/gvcf/ /path/to/directory/D00258/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00264/gvcf/ /path/to/directory/D00264/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00268/alignment/ /path/to/directory/D00268/new.folder/alignment/", "mcli mv --recursive /path/to/directory/D00268/bam/ /path/to/directory/D00268/new.folder/bam/", "mcli mv --recursive /path/to/directory/D00268/fastqc/ /path/to/directory/D00268/new.folder/fastqc/", "mcli mv --recursive /path/to/directory/D00268/gvcf/ /path/to/directory/D00268/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00268/logs/ /path/to/directory/D00268/new.folder/logs/", "mcli mv --recursive /path/to/directory/D00268/trim/ /path/to/directory/D00268/new.folder/trim/", "mcli mv --recursive /path/to/directory/D00269/gvcf/ /path/to/directory/D00269/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00270/gvcf/ /path/to/directory/D00270/new.folder/gvcf/")
我这样做有以下三个原因:(1) 我收到关于 corrupt data frame: columns will be truncated or padded with NAs
的警告; (2) 那些是 factor
s,当我们真的只需要 character
时(很容易修复,我知道); (3) 我希望这可以推广到任何其他 column/vector.
我会稍微修改一下您的 slurm
模板,因为
slurm <- list(
bef = "#!/bin/bash -l\n#SBATCH --nodes=1\n#SBATCH --ntasks-per-node=10 \n#SBATCH --mem-per-cpu=5gb\n#SBATCH -t 20:00:00\n#SBATCH --mail-type=ALL\n#SBATCH --mail-user=email \n#SBATCH -o %j.out\n#SBATCH -e %j.err\n\n\ncd $SLURM_SUBMIT_DIR",
job = "srun --exclusive --ntasks 1 %s &",
aft = "\n\nwait\n"
)
从这里开始:
scripts <- by(mcli, (seq_along(mcli)-1) %/% 10,
FUN = function(z) {
paste(slurm$bef, paste(sprintf(slurm$job, z), collapse = "\n"),
slurm$aft, sep = "\n")
})
scripts
的每个元素都是一个完整的 10 作业批处理文件。例如,最后一个是
> cat(scripts[[3]], "\n")
#!/bin/bash -l
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=10
#SBATCH --mem-per-cpu=5gb
#SBATCH -t 20:00:00
#SBATCH --mail-type=ALL
#SBATCH --mail-user=email
#SBATCH -o %j.out
#SBATCH -e %j.err
cd $SLURM_SUBMIT_DIR
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/fastqc/ /path/to/directory/D00268/new.folder/fastqc/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/gvcf/ /path/to/directory/D00268/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/logs/ /path/to/directory/D00268/new.folder/logs/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/trim/ /path/to/directory/D00268/new.folder/trim/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00269/gvcf/ /path/to/directory/D00269/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00270/gvcf/ /path/to/directory/D00270/new.folder/gvcf/ &
wait
如果您需要将它们作为单独的文件,您可以执行以下操作:
for (i in seq_along(scripts)) {
writeLines(scripts[[i]], paste0("script", i))
}
我不确定以前是否有人问过这个问题,如果可以更好地措辞,请随时编辑这个问题的标题。我有一个需要移动的文件列表。它们存储在服务器上并且非常大,因此移动它们需要一些时间。我一次只能真正移动 10 个文件,但我可以提交多个作业。所以我想做的是使用 gsub 和 for 循环来编辑一个包含前 10 个文件的脚本,然后是一个包含接下来要移动的 10 个文件的新脚本,然后是一个包含接下来要移动的 10 个文件的新脚本,依此类推。这是我要移动的文件的示例。该列表有 26 个文件...我知道我提到过我想以 10 个为一组移动,但我很好奇如果有 6 个样本而不是 10 个样本会发生什么。输出文件可以命名为任何名称,这并不重要.它们显然需要是独一无二的。我还将附上我尝试 运行 的 for 循环,但我遇到的困难是遍历列表并创建一个唯一的名称
mcli <- structure(list(`mcli[c(1:25), ]` = structure(1:26, .Label = c("mcli mv --recursive /path/to/directory/D00063/alignment/ /path/to/directory/D00063/new.folder/alignment/",
"mcli mv --recursive /path/to/directory/D00063/bam/ /path/to/directory/D00063/new.folder/bam/",
"mcli mv --recursive /path/to/directory/D00063/fastqc/ /path/to/directory/D00063/new.folder/fastqc/",
"mcli mv --recursive /path/to/directory/D00063/gvcf/ /path/to/directory/D00063/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00063/trim/ /path/to/directory/D00063/new.folder/trim/",
"mcli mv --recursive /path/to/directory/D00064/alignment/ /path/to/directory/D00064/new.folder/alignment/",
"mcli mv --recursive /path/to/directory/D00064/bam/ /path/to/directory/D00064/new.folder/bam/",
"mcli mv --recursive /path/to/directory/D00064/fastqc/ /path/to/directory/D00064/new.folder/fastqc/",
"mcli mv --recursive /path/to/directory/D00064/gvcf/ /path/to/directory/D00064/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00064/trim/ /path/to/directory/D00064/new.folder/trim/",
"mcli mv --recursive /path/to/directory/D00105/gvcf/ /path/to/directory/D00105/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00151/gvcf/ /path/to/directory/D00151/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00188/gvcf/ /path/to/directory/D00188/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D002053/D02053/ /path/to/directory/D002053/new.folder/D02053/",
"mcli mv --recursive /path/to/directory/D00220/gvcf/ /path/to/directory/D00220/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00257/gvcf/ /path/to/directory/D00257/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00258/gvcf/ /path/to/directory/D00258/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00264/gvcf/ /path/to/directory/D00264/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00268/alignment/ /path/to/directory/D00268/new.folder/alignment/",
"mcli mv --recursive /path/to/directory/D00268/bam/ /path/to/directory/D00268/new.folder/bam/",
"mcli mv --recursive /path/to/directory/D00268/fastqc/ /path/to/directory/D00268/new.folder/fastqc/",
"mcli mv --recursive /path/to/directory/D00268/gvcf/ /path/to/directory/D00268/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00268/logs/ /path/to/directory/D00268/new.folder/logs/",
"mcli mv --recursive /path/to/directory/D00268/trim/ /path/to/directory/D00268/new.folder/trim/",
"mcli mv --recursive /path/to/directory/D00269/gvcf/ /path/to/directory/D00269/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00270/gvcf/ /path/to/directory/D00270/new.folder/gvcf/"), class = "factor")), .Names = "mcli[c(1:25), ]", row.names = c(NA,
-25L), class = "data.frame")
这是我要编辑的脚本示例
slurm <- structure(list(V1 = c("#!/bin/bash -l", "#SBATCH --nodes=1",
"#SBATCH --ntasks-per-node=10 ", "#SBATCH --mem-per-cpu=5gb",
"#SBATCH -t 20:00:00", "#SBATCH --mail-type=ALL",
"#SBATCH --mail-user=email ", "#SBATCH -o %j.out",
"#SBATCH -e %j.err", "", "", "cd $SLURM_SUBMIT_DIR",
"", "srun --exclusive --ntasks 1 abc123 &", "srun --exclusive --ntasks 1 def456 &",
"srun --exclusive --ntasks 1 ghi789 &", "srun --exclusive --ntasks 1 jkl101112 &",
"srun --exclusive --ntasks 1 mno131415 &", "srun --exclusive --ntasks 1 pqr161718 &",
"srun --exclusive --ntasks 1 stu192021 &", "srun --exclusive --ntasks 1 vwx222324 &",
"srun --exclusive --ntasks 1 yza252627 &", "srun --exclusive --ntasks 1 abc282930 &",
"", "wait")), row.names = c(NA,
-25L), class = c("data.table", "data.frame"))
无效的示例代码
for(i in 1:length(mcli[[1]])){
df2 <- as.data.frame(slurm)
df2[,'V1'] <- gsub("abc123", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("def456", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("ghi789", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("jkl101112", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("mno131415", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("pqr161718", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("stu192021", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("vwx222324", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("yza252627", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("abc282930", mcli[['V1']][i], df2[,'V1'])
write.table(df2, paste(file = "/path/to/random/folder/",mcli[['V1']][i], ".csh", sep = ""), row.names = FALSE, col.names =FALSE, quote = FALSE)
}
预期输出脚本 1
#!/bin/bash -l
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=10
#SBATCH --mem-per-cpu=5gb
#SBATCH -t 20:00:00
#SBATCH --mail-type=ALL
#SBATCH --mail-user=bgwillia@umn.edu
#SBATCH -o %j.out
#SBATCH -e %j.err
cd $SLURM_SUBMIT_DIR
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00063/alignment/ /path/to/directory/D00063/new.folder/alignment/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00063/bam/ /path/to/directory/D00063/new.folder/bam/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00063/fastqc/ /path/to/directory/D00063/new.folder/fastqc/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00063/gvcf/ /path/to/directory/D00063/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00063/trim/ /path/to/directory/D00063/new.folder/trim/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00064/alignment/ /path/to/directory/D00064/new.folder/alignment/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00064/bam/ /path/to/directory/D00064/new.folder/bam/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00064/fastqc/ /path/to/directory/D00064/new.folder/fastqc/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00064/gvcf/ /path/to/directory/D00064/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00064/trim/ /path/to/directory/D00064/new.folder/trim/ &
wait
预期输出脚本 2
#!/bin/bash -l
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=10
#SBATCH --mem-per-cpu=5gb
#SBATCH -t 20:00:00
#SBATCH --mail-type=ALL
#SBATCH --mail-user=bgwillia@umn.edu
#SBATCH -o %j.out
#SBATCH -e %j.err
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00105/gvcf/ /path/to/directory/D00105/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00151/gvcf/ /path/to/directory/D00151/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00188/gvcf/ /path/to/directory/D00188/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D002053/D02053/ /path/to/directory/D002053/new.folder/D02053/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00220/gvcf/ /path/to/directory/D00220/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00257/gvcf/ /path/to/directory/D00257/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00258/gvcf/ /path/to/directory/D00258/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00264/gvcf/ /path/to/directory/D00264/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/alignment/ /path/to/directory/D00268/new.folder/alignment/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/bam/ /path/to/directory/D00268/new.folder/bam/ &
wait
预期输出脚本 3
#!/bin/bash -l
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=10
#SBATCH --mem-per-cpu=5gb
#SBATCH -t 20:00:00
#SBATCH --mail-type=ALL
#SBATCH --mail-user=bgwillia@umn.edu
#SBATCH -o %j.out
#SBATCH -e %j.err
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/fastqc/ /path/to/directory/D00268/new.folder/fastqc/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/gvcf/ /path/to/directory/D00268/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/logs/ /path/to/directory/D00268/new.folder/logs/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/trim/ /path/to/directory/D00268/new.folder/trim/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00269/gvcf/ /path/to/directory/D00269/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00270/gvcf/ /path/to/directory/D00270/new.folder/gvcf/ &
wait
我将对 运行:
的命令向量进行操作,而不是帧mcli <- c("mcli mv --recursive /path/to/directory/D00063/alignment/ /path/to/directory/D00063/new.folder/alignment/", "mcli mv --recursive /path/to/directory/D00063/bam/ /path/to/directory/D00063/new.folder/bam/", "mcli mv --recursive /path/to/directory/D00063/fastqc/ /path/to/directory/D00063/new.folder/fastqc/", "mcli mv --recursive /path/to/directory/D00063/gvcf/ /path/to/directory/D00063/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00063/trim/ /path/to/directory/D00063/new.folder/trim/", "mcli mv --recursive /path/to/directory/D00064/alignment/ /path/to/directory/D00064/new.folder/alignment/", "mcli mv --recursive /path/to/directory/D00064/bam/ /path/to/directory/D00064/new.folder/bam/", "mcli mv --recursive /path/to/directory/D00064/fastqc/ /path/to/directory/D00064/new.folder/fastqc/", "mcli mv --recursive /path/to/directory/D00064/gvcf/ /path/to/directory/D00064/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00064/trim/ /path/to/directory/D00064/new.folder/trim/", "mcli mv --recursive /path/to/directory/D00105/gvcf/ /path/to/directory/D00105/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00151/gvcf/ /path/to/directory/D00151/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00188/gvcf/ /path/to/directory/D00188/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D002053/D02053/ /path/to/directory/D002053/new.folder/D02053/", "mcli mv --recursive /path/to/directory/D00220/gvcf/ /path/to/directory/D00220/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00257/gvcf/ /path/to/directory/D00257/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00258/gvcf/ /path/to/directory/D00258/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00264/gvcf/ /path/to/directory/D00264/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00268/alignment/ /path/to/directory/D00268/new.folder/alignment/", "mcli mv --recursive /path/to/directory/D00268/bam/ /path/to/directory/D00268/new.folder/bam/", "mcli mv --recursive /path/to/directory/D00268/fastqc/ /path/to/directory/D00268/new.folder/fastqc/", "mcli mv --recursive /path/to/directory/D00268/gvcf/ /path/to/directory/D00268/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00268/logs/ /path/to/directory/D00268/new.folder/logs/", "mcli mv --recursive /path/to/directory/D00268/trim/ /path/to/directory/D00268/new.folder/trim/", "mcli mv --recursive /path/to/directory/D00269/gvcf/ /path/to/directory/D00269/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00270/gvcf/ /path/to/directory/D00270/new.folder/gvcf/")
我这样做有以下三个原因:(1) 我收到关于 corrupt data frame: columns will be truncated or padded with NAs
的警告; (2) 那些是 factor
s,当我们真的只需要 character
时(很容易修复,我知道); (3) 我希望这可以推广到任何其他 column/vector.
我会稍微修改一下您的 slurm
模板,因为
slurm <- list(
bef = "#!/bin/bash -l\n#SBATCH --nodes=1\n#SBATCH --ntasks-per-node=10 \n#SBATCH --mem-per-cpu=5gb\n#SBATCH -t 20:00:00\n#SBATCH --mail-type=ALL\n#SBATCH --mail-user=email \n#SBATCH -o %j.out\n#SBATCH -e %j.err\n\n\ncd $SLURM_SUBMIT_DIR",
job = "srun --exclusive --ntasks 1 %s &",
aft = "\n\nwait\n"
)
从这里开始:
scripts <- by(mcli, (seq_along(mcli)-1) %/% 10,
FUN = function(z) {
paste(slurm$bef, paste(sprintf(slurm$job, z), collapse = "\n"),
slurm$aft, sep = "\n")
})
scripts
的每个元素都是一个完整的 10 作业批处理文件。例如,最后一个是
> cat(scripts[[3]], "\n")
#!/bin/bash -l
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=10
#SBATCH --mem-per-cpu=5gb
#SBATCH -t 20:00:00
#SBATCH --mail-type=ALL
#SBATCH --mail-user=email
#SBATCH -o %j.out
#SBATCH -e %j.err
cd $SLURM_SUBMIT_DIR
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/fastqc/ /path/to/directory/D00268/new.folder/fastqc/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/gvcf/ /path/to/directory/D00268/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/logs/ /path/to/directory/D00268/new.folder/logs/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/trim/ /path/to/directory/D00268/new.folder/trim/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00269/gvcf/ /path/to/directory/D00269/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00270/gvcf/ /path/to/directory/D00270/new.folder/gvcf/ &
wait
如果您需要将它们作为单独的文件,您可以执行以下操作:
for (i in seq_along(scripts)) {
writeLines(scripts[[i]], paste0("script", i))
}