如何在 R 中执行脚本的循环和迭代?
How to perform looping and iteration of a script in R?
我正在 运行使用 R 脚本来分析一些生物数据。下面提供了片段数据和脚本的示例。这个数据文件有很多列(但我想关注第 5 列 - 基因)。我有超过 100 个这样的数据文件(将所有文件中的第 5 基因列视为感兴趣的列)。目前,我 运行 在 R 中分别设置每个文件,这个过程很乏味。我想 运行 同时为所有数据文件创建一个 R 脚本,并根据文件名将所有数据保存在不同的文件夹中。是否可以在脚本中一次循环或迭代并分析所有数据文件。请协助我。
谢谢,
图菲克
格式化问题和修改后的数据框
已插入:要读取的文件名
M3.1_IPA.txt
M8.1_IPA.txt
M8.2_IPA.txt
M8.3_IPA.txt
1.加载基因集进行分析
Data_file <- read.delim(file = "./M3.1_IPA.txt", stringsAsFactors = FALSE, check.names = FALSE, row.names = NULL)
dput(head(Data_file, 5))
structure(list(row.names = c("M3.1_ALPP", "M3.1_ALS2CR14", "M3.1_ANKRD30B",
"M3.1_ARL16", "M3.1_BCYRN1"), X = 1:5, Module = c("M3.1", "M3.1",
"M3.1", "M3.1", "M3.1"), Title = c("Cell Cycle", "Cell Cycle",
"Cell Cycle", "Cell Cycle", "Cell Cycle"), Gene = c("ALPP", "ALS2CR14",
"ANKRD30B", "ARL16", "BCYRN1"), Probes = c("ILMN_1693789", "ILMN_1787314",
"ILMN_1730678", "ILMN_2188119", "ILMN_1678757"), Module_gene = c("M3.1_ALPP",
"M3.1_ALS2CR14", "M3.1_ANKRD30B", "M3.1_ARL16", "M3.1_BCYRN1"
)), row.names = c(NA, 5L), class = "data.frame")
Module_10_1 <- Data_file[,5]
Module_10_1_geneLists <- list(Module_10_1_geneListName=Module_10_1)
Select 要使用的主题数据库(即有机体和 TSS 周围的距离)
data(motifAnnotations_hgnc)
motifRankings <- importRankings("./hg19-tss-centered-10kb-7species.mc9nr.feather")
计算浓缩度
motifs_AUC <- calcAUC(Module_10_1_geneLists, motifRankings, nCores=1)
Type = "Enrichment"
Module = "M10_1"
auc <- getAUC(motifs_AUC)
##Export the plots##
pdf(paste0(Type, "_", Module, "_AUC_Histogram_Plot.pdf"), height = 5, width = 10)
hist(auc, main="Module_10_1", xlab="AUC histogram",
breaks=100, col="#ff000050", border="darkred")
nes3 <- (3*sd(auc)) + mean(auc)
abline(v=nes3, col="red")
dev.off()
Select 重要图案 and/or 注释到 TFs
motifEnrichmentTable <- addMotifAnnotation(motifs_AUC, nesThreshold=3,
motifAnnot=motifAnnotations_hgnc)
##Export the Motif enrichment analysis results###
write.csv(motifEnrichmentTable, file ="./motifEnrichmentTable.csv", row.names = F, sep = ",")
确定每个基序的最佳富集基因
motifEnrichmentTable_wGenes_v1 <- addSignificantGenes(motifEnrichmentTable,
rankings=motifRankings,
geneSets=Module_10_1_geneLists)
##Export the Motif enrichment analysis results##
write.csv(motifEnrichmentTable_wGenes_v1,
file ="./motifEnrichmentTable_wGenes_v1.csv",
row.names = F, sep = ",")
几个主题的情节
geneSetName <- "Module_10_1_Genelist"
selectedMotifs <- c("cisbp__M6275",
sample(motifEnrichmentTable$motif, 2))
par(mfrow=c(2,2))
Type_v1 <- "Few_motifs"
##Export the plots
pdf(paste0(Type_v1, "_", Module, "_Sig_Genes_Plot.pdf"), height = 5, width = 5)
getSignificantGenes(Module_10_1_geneLists,
motifRankings,
signifRankingNames=selectedMotifs,
plotCurve=TRUE, maxRank=5000, genesFormat="none",
method="aprox")
dev.off()
RcisTarget的最终输出是一个data.table并导出到html
motifEnrichmentTable_wGenes_v1_wLogo <- addLogo(motifEnrichmentTable_wGenes_v1)
resultsSubset <- motifEnrichmentTable_wGenes_v1_wLogo[1:147,]
Type_v2 <- "Motifs_Table"
library(DT)
## export the data table to html file format###
dtable <- datatable(resultsSubset[,-c("enrichedGenes", "TF_lowConf"), with=FALSE],
escape = FALSE, # To show the logo
filter="top", options=list(pageLength=100))
html_test <- "dtable.html"
saveWidget(dtable, html_test)
构建网络并导出为 html 格式
signifMotifNames <- motifEnrichmentTable$motif[1:3]
incidenceMatrix <- getSignificantGenes(Module_10_1_geneLists,
motifRankings,
signifRankingNames=signifMotifNames,
plotCurve=TRUE, maxRank=5000,
genesFormat="incidMatrix",
method="aprox")$incidMatrix
library(reshape2)
edges <- melt(incidenceMatrix)
edges <- edges[which(edges[,3]==1),1:2]
colnames(edges) <- c("from","to")
library(visNetwork)
motifs <- unique(as.character(edges[,1]))
genes <- unique(as.character(edges[,2]))
nodes <- data.frame(id=c(motifs, genes),
label=c(motifs, genes),
title=c(motifs, genes), # tooltip
shape=c(rep("diamond", length(motifs)), rep("elypse", length(genes))),
color=c(rep("purple", length(motifs)), rep("skyblue", length(genes))))
Network <- visNetwork(nodes, edges) %>% visOptions(highlightNearest = TRUE,
nodesIdSelection = TRUE)
##Export the network##
visSave(Network, file ="Network.html")
我是根据给定的示例文件名编写的:
file_names <- c("M3.1_IPA.txt","M8.1_IPA.txt","M8.2_IPA.txt","M8.3_IPA.txt")
最简单的方法是迭代 1:length(file_names)
,并为每次迭代创建一组唯一的输出文件。根据您的问题,您想将结果保存到不同的文件夹中。这可以通过提取文件名(删除 .txt)然后使用该名称创建一个新目录来完成。然后您可以将此迭代的所有输出保存到新目录
然后会为下一次迭代创建一个新目录,因此您不会保存以前的结果。
for(i in 1:length(file_names)){
Data_file <- read.csv(file = paste0("./",file_names[i]), stringsAsFactors = FALSE, check.names = FALSE)
Module_10_1 <- Data_file[,1]
Module_10_1_geneLists <- list(Module_10_1_geneListName=Module_10_1)
data(motifAnnotations_hgnc)
motifRankings <- importRankings("./hg19-tss-centered-10kb-7species.mc9nr.feather")
motifs_AUC <- calcAUC(Module_10_1_geneLists, motifRankings, nCores=1)
Type = "Enrichment"
Module = "M10_1"
auc <- getAUC(motifs_AUC)
# Extract the file name without the extension, so the directory with the results
# will correspond to this name:
new_directory_name <- gsub(x = file_names[i],pattern = "(.*)\.txt","\1")
#Create the new directory:
dir.create(new_directory_name)
##Export the plots##
# Save to the new directory
pdf(paste0(new_directory_name,"/",Type,"_", Module,"_AUC_Histogram_Plot.pdf"), height = 5, width = 10)
hist(auc, main="Module_10_1", xlab="AUC histogram",
breaks=100, col="#ff000050", border="darkred")
nes3 <- (3*sd(auc)) + mean(auc)
abline(v=nes3, col="red")
dev.off()
}
我省略了你脚本的其余部分,因为保存 motifEnrichmentTable_wGenes_v1.csv
、_Sig_Genes_Plot.pdf
和其他输出的解决方案与上面 _AUC_Histogram_Plot.pdf
相同。您只需要使用 paste0(new_directory_name,"/",<insert-output-name>)
.
将这些输出写入新目录
如果您有很多文件,您可以在本地目录中搜索与正确模式匹配的文件,而不是手动创建向量 file_names
。例如
all_files <- dir()
file_names <- grep(all_files,pattern = "*.txt$",value = TRUE)
将 return 本地目录中的所有 .txt 文件。如果只需要以“M”开头的.txt文件,您可以进一步优化它:
all_files <- dir()
file_names <- grep(all_files,pattern = "^M.*.txt$",value = TRUE)
如果不是本地目录中的所有 .txt 文件都是输入文件,您将执行此操作,并且您可能需要进一步优化正则表达式模式,直到您只捕获所需的 txt 文件。
对于 .html 文件有点困难,因为 saveWidget()
目前不允许您使用相对文件路径保存,但是 solution 使用 withr::with_dir()
。这应该适用于第 7 部分:
withr::with_dir(new_directory_name, saveWidget(dtable, file="dtable.html"))
这应该适用于第 8 部分:
withr::with_dir(new_directory_name, saveWidget(Network, file="Network.html"))
7. RcisTarget 的最终输出是 data.table 并导出到 html
motifEnrichmentTable_wGenes_v1_wLogo <- addLogo(motifEnrichmentTable_wGenes_v1)
resultsSubset <- motifEnrichmentTable_wGenes_v1_wLogo[1:147,]
Type_v2 <- "Motifs_Table"
# Extract the file name without the extension, so the directory with the results
# will correspond to this name:
new_directory_name <- gsub(x = file_names[i],pattern = "(.*)\.txt","\1")
#Create the new directory:
dir.create(new_directory_name)
library(DT)
library(webshot)
library(htmltools)
library(xml2)
## export the data table to html file format###
dtable <- datatable(resultsSubset[,-c("enrichedGenes", "TF_lowConf"), with=FALSE],
escape = FALSE, # To show the logo
filter="top", options=list(pageLength=1000))
html_test <- paste0(new_directory_name,"/","dtable.html")
##To save files in the pdf format###
#saveWidget(dtable, html_test)
#webshot(html_test, paste0(new_directory_name,"/", "motif.pdf"))
8.构建网络并导出为 html 格式
# Extract the file name without the extension, so the directory with the results
# will correspond to this name:
new_directory_name <- gsub(x = file_names[i],pattern = "(.*)\.txt","\1")
#Create the new directory:
dir.create(new_directory_name)
signifMotifNames <- motifEnrichmentTable$motif[1:3]
incidenceMatrix <- getSignificantGenes(Module_10_1_geneLists,
motifRankings,
signifRankingNames=signifMotifNames,
plotCurve=TRUE, maxRank=5000,
genesFormat="incidMatrix",
method="aprox")$incidMatrix
library(reshape2)
edges <- melt(incidenceMatrix)
edges <- edges[which(edges[,3]==1),1:2]
colnames(edges) <- c("from","to")
library(visNetwork)
motifs <- unique(as.character(edges[,1]))
genes <- unique(as.character(edges[,2]))
nodes <- data.frame(id=c(motifs, genes),
label=c(motifs, genes),
title=c(motifs, genes), # tooltip
shape=c(rep("diamond", length(motifs)), rep("elypse", length(genes))),
color=c(rep("purple", length(motifs)), rep("skyblue", length(genes))))
Network <- visNetwork(nodes, edges) %>% visOptions(highlightNearest = TRUE,
nodesIdSelection = TRUE)
##Export the network##
#visSave(Network, file ="Network.html")
html_test_v1 <- paste0(new_directory_name,"/","Network.html")
#saveWidget(Network, html_test_v1)
#webshot(html_test_v1, paste0(new_directory_name,"/", "Network.pdf"))
我正在 运行使用 R 脚本来分析一些生物数据。下面提供了片段数据和脚本的示例。这个数据文件有很多列(但我想关注第 5 列 - 基因)。我有超过 100 个这样的数据文件(将所有文件中的第 5 基因列视为感兴趣的列)。目前,我 运行 在 R 中分别设置每个文件,这个过程很乏味。我想 运行 同时为所有数据文件创建一个 R 脚本,并根据文件名将所有数据保存在不同的文件夹中。是否可以在脚本中一次循环或迭代并分析所有数据文件。请协助我。
谢谢,
图菲克
格式化问题和修改后的数据框
已插入:要读取的文件名
M3.1_IPA.txt
M8.1_IPA.txt
M8.2_IPA.txt
M8.3_IPA.txt
1.加载基因集进行分析
Data_file <- read.delim(file = "./M3.1_IPA.txt", stringsAsFactors = FALSE, check.names = FALSE, row.names = NULL)
dput(head(Data_file, 5))
structure(list(row.names = c("M3.1_ALPP", "M3.1_ALS2CR14", "M3.1_ANKRD30B",
"M3.1_ARL16", "M3.1_BCYRN1"), X = 1:5, Module = c("M3.1", "M3.1",
"M3.1", "M3.1", "M3.1"), Title = c("Cell Cycle", "Cell Cycle",
"Cell Cycle", "Cell Cycle", "Cell Cycle"), Gene = c("ALPP", "ALS2CR14",
"ANKRD30B", "ARL16", "BCYRN1"), Probes = c("ILMN_1693789", "ILMN_1787314",
"ILMN_1730678", "ILMN_2188119", "ILMN_1678757"), Module_gene = c("M3.1_ALPP",
"M3.1_ALS2CR14", "M3.1_ANKRD30B", "M3.1_ARL16", "M3.1_BCYRN1"
)), row.names = c(NA, 5L), class = "data.frame")
Module_10_1 <- Data_file[,5]
Module_10_1_geneLists <- list(Module_10_1_geneListName=Module_10_1)
Select 要使用的主题数据库(即有机体和 TSS 周围的距离)
data(motifAnnotations_hgnc) motifRankings <- importRankings("./hg19-tss-centered-10kb-7species.mc9nr.feather")
计算浓缩度
motifs_AUC <- calcAUC(Module_10_1_geneLists, motifRankings, nCores=1) Type = "Enrichment" Module = "M10_1" auc <- getAUC(motifs_AUC) ##Export the plots## pdf(paste0(Type, "_", Module, "_AUC_Histogram_Plot.pdf"), height = 5, width = 10) hist(auc, main="Module_10_1", xlab="AUC histogram", breaks=100, col="#ff000050", border="darkred") nes3 <- (3*sd(auc)) + mean(auc) abline(v=nes3, col="red") dev.off()
Select 重要图案 and/or 注释到 TFs
motifEnrichmentTable <- addMotifAnnotation(motifs_AUC, nesThreshold=3, motifAnnot=motifAnnotations_hgnc) ##Export the Motif enrichment analysis results### write.csv(motifEnrichmentTable, file ="./motifEnrichmentTable.csv", row.names = F, sep = ",")
确定每个基序的最佳富集基因
motifEnrichmentTable_wGenes_v1 <- addSignificantGenes(motifEnrichmentTable, rankings=motifRankings, geneSets=Module_10_1_geneLists) ##Export the Motif enrichment analysis results## write.csv(motifEnrichmentTable_wGenes_v1, file ="./motifEnrichmentTable_wGenes_v1.csv", row.names = F, sep = ",")
几个主题的情节
geneSetName <- "Module_10_1_Genelist" selectedMotifs <- c("cisbp__M6275", sample(motifEnrichmentTable$motif, 2)) par(mfrow=c(2,2)) Type_v1 <- "Few_motifs" ##Export the plots pdf(paste0(Type_v1, "_", Module, "_Sig_Genes_Plot.pdf"), height = 5, width = 5) getSignificantGenes(Module_10_1_geneLists, motifRankings, signifRankingNames=selectedMotifs, plotCurve=TRUE, maxRank=5000, genesFormat="none", method="aprox") dev.off()
RcisTarget的最终输出是一个data.table并导出到html
motifEnrichmentTable_wGenes_v1_wLogo <- addLogo(motifEnrichmentTable_wGenes_v1) resultsSubset <- motifEnrichmentTable_wGenes_v1_wLogo[1:147,] Type_v2 <- "Motifs_Table" library(DT) ## export the data table to html file format### dtable <- datatable(resultsSubset[,-c("enrichedGenes", "TF_lowConf"), with=FALSE], escape = FALSE, # To show the logo filter="top", options=list(pageLength=100)) html_test <- "dtable.html" saveWidget(dtable, html_test)
构建网络并导出为 html 格式
signifMotifNames <- motifEnrichmentTable$motif[1:3] incidenceMatrix <- getSignificantGenes(Module_10_1_geneLists, motifRankings, signifRankingNames=signifMotifNames, plotCurve=TRUE, maxRank=5000, genesFormat="incidMatrix", method="aprox")$incidMatrix library(reshape2) edges <- melt(incidenceMatrix) edges <- edges[which(edges[,3]==1),1:2] colnames(edges) <- c("from","to") library(visNetwork) motifs <- unique(as.character(edges[,1])) genes <- unique(as.character(edges[,2])) nodes <- data.frame(id=c(motifs, genes), label=c(motifs, genes), title=c(motifs, genes), # tooltip shape=c(rep("diamond", length(motifs)), rep("elypse", length(genes))), color=c(rep("purple", length(motifs)), rep("skyblue", length(genes)))) Network <- visNetwork(nodes, edges) %>% visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE) ##Export the network## visSave(Network, file ="Network.html")
我是根据给定的示例文件名编写的:
file_names <- c("M3.1_IPA.txt","M8.1_IPA.txt","M8.2_IPA.txt","M8.3_IPA.txt")
最简单的方法是迭代 1:length(file_names)
,并为每次迭代创建一组唯一的输出文件。根据您的问题,您想将结果保存到不同的文件夹中。这可以通过提取文件名(删除 .txt)然后使用该名称创建一个新目录来完成。然后您可以将此迭代的所有输出保存到新目录
然后会为下一次迭代创建一个新目录,因此您不会保存以前的结果。
for(i in 1:length(file_names)){
Data_file <- read.csv(file = paste0("./",file_names[i]), stringsAsFactors = FALSE, check.names = FALSE)
Module_10_1 <- Data_file[,1]
Module_10_1_geneLists <- list(Module_10_1_geneListName=Module_10_1)
data(motifAnnotations_hgnc)
motifRankings <- importRankings("./hg19-tss-centered-10kb-7species.mc9nr.feather")
motifs_AUC <- calcAUC(Module_10_1_geneLists, motifRankings, nCores=1)
Type = "Enrichment"
Module = "M10_1"
auc <- getAUC(motifs_AUC)
# Extract the file name without the extension, so the directory with the results
# will correspond to this name:
new_directory_name <- gsub(x = file_names[i],pattern = "(.*)\.txt","\1")
#Create the new directory:
dir.create(new_directory_name)
##Export the plots##
# Save to the new directory
pdf(paste0(new_directory_name,"/",Type,"_", Module,"_AUC_Histogram_Plot.pdf"), height = 5, width = 10)
hist(auc, main="Module_10_1", xlab="AUC histogram",
breaks=100, col="#ff000050", border="darkred")
nes3 <- (3*sd(auc)) + mean(auc)
abline(v=nes3, col="red")
dev.off()
}
我省略了你脚本的其余部分,因为保存 motifEnrichmentTable_wGenes_v1.csv
、_Sig_Genes_Plot.pdf
和其他输出的解决方案与上面 _AUC_Histogram_Plot.pdf
相同。您只需要使用 paste0(new_directory_name,"/",<insert-output-name>)
.
如果您有很多文件,您可以在本地目录中搜索与正确模式匹配的文件,而不是手动创建向量 file_names
。例如
all_files <- dir()
file_names <- grep(all_files,pattern = "*.txt$",value = TRUE)
将 return 本地目录中的所有 .txt 文件。如果只需要以“M”开头的.txt文件,您可以进一步优化它:
all_files <- dir()
file_names <- grep(all_files,pattern = "^M.*.txt$",value = TRUE)
如果不是本地目录中的所有 .txt 文件都是输入文件,您将执行此操作,并且您可能需要进一步优化正则表达式模式,直到您只捕获所需的 txt 文件。
对于 .html 文件有点困难,因为 saveWidget()
目前不允许您使用相对文件路径保存,但是 solution 使用 withr::with_dir()
。这应该适用于第 7 部分:
withr::with_dir(new_directory_name, saveWidget(dtable, file="dtable.html"))
这应该适用于第 8 部分:
withr::with_dir(new_directory_name, saveWidget(Network, file="Network.html"))
7. RcisTarget 的最终输出是 data.table 并导出到 html
motifEnrichmentTable_wGenes_v1_wLogo <- addLogo(motifEnrichmentTable_wGenes_v1)
resultsSubset <- motifEnrichmentTable_wGenes_v1_wLogo[1:147,]
Type_v2 <- "Motifs_Table"
# Extract the file name without the extension, so the directory with the results
# will correspond to this name:
new_directory_name <- gsub(x = file_names[i],pattern = "(.*)\.txt","\1")
#Create the new directory:
dir.create(new_directory_name)
library(DT)
library(webshot)
library(htmltools)
library(xml2)
## export the data table to html file format###
dtable <- datatable(resultsSubset[,-c("enrichedGenes", "TF_lowConf"), with=FALSE],
escape = FALSE, # To show the logo
filter="top", options=list(pageLength=1000))
html_test <- paste0(new_directory_name,"/","dtable.html")
##To save files in the pdf format###
#saveWidget(dtable, html_test)
#webshot(html_test, paste0(new_directory_name,"/", "motif.pdf"))
8.构建网络并导出为 html 格式
# Extract the file name without the extension, so the directory with the results
# will correspond to this name:
new_directory_name <- gsub(x = file_names[i],pattern = "(.*)\.txt","\1")
#Create the new directory:
dir.create(new_directory_name)
signifMotifNames <- motifEnrichmentTable$motif[1:3]
incidenceMatrix <- getSignificantGenes(Module_10_1_geneLists,
motifRankings,
signifRankingNames=signifMotifNames,
plotCurve=TRUE, maxRank=5000,
genesFormat="incidMatrix",
method="aprox")$incidMatrix
library(reshape2)
edges <- melt(incidenceMatrix)
edges <- edges[which(edges[,3]==1),1:2]
colnames(edges) <- c("from","to")
library(visNetwork)
motifs <- unique(as.character(edges[,1]))
genes <- unique(as.character(edges[,2]))
nodes <- data.frame(id=c(motifs, genes),
label=c(motifs, genes),
title=c(motifs, genes), # tooltip
shape=c(rep("diamond", length(motifs)), rep("elypse", length(genes))),
color=c(rep("purple", length(motifs)), rep("skyblue", length(genes))))
Network <- visNetwork(nodes, edges) %>% visOptions(highlightNearest = TRUE,
nodesIdSelection = TRUE)
##Export the network##
#visSave(Network, file ="Network.html")
html_test_v1 <- paste0(new_directory_name,"/","Network.html")
#saveWidget(Network, html_test_v1)
#webshot(html_test_v1, paste0(new_directory_name,"/", "Network.pdf"))