R:使用数据框中特定列的元素重命名文件
R: Renaming files using elements of a particular column in the dataframe
我有一个 .txt 文件的文件夹,每个文件都有一个很长的字符串名称,例如 "ctrl_Jack_DrugA_XXuM.txt"。
但是这个名字缺少一个重要的字符串,timestamps。
但是,我在每个文件的数据框中都有该信息。
例如,在每个文件中,包含多个列,其中一个列称为 "Pid_treatmentsum":
其中的元素是"Jack_R4_200514_DrugA_XXuM.txt"
所以在我继续下游之前,我想根据名称(例如 Jack)和时间戳(例如 "R4_200514")将文件分类到子文件夹中,为此我需要重命名文件标题"Pid_treatmentsum".
现在代码:
```
#create MRE
#file 1
Row <- c(rep("16", 20))
column <- c(rep("3", 20))
Pid<- c(rep("Jack", 20))
Stimulation<- c(rep("3S", 20))
Drug <- c(rep("2DG", 20))
Dose <-c(rep("3uM", 20))
Treatmentsum <-c(rep(paste("Jack","3S",'2DG','3uM',sep = "_"), 20))
PiD_treatmentsum <- c(rep(paste('Jack',"T4_20200501",'3S','2DG','3uM',sep = "_"), 20))
sampleset <-data.frame(Row,column,Pid,Stimulation,Drug,Dose,Treatmentsum,PiD_treatmentsum)
write.table(sampleset, file = "ctrl_Jack_3S_2DG_3uM.txt",sep="\t", row.names = F, col.names = T)
#file 2
Row <- c(rep("16", 40))
column <- c(rep("3", 40))
Pid<- c(rep("Mark", 40))
Stimulation<- c(rep("3S", 40))
Drug <- c(rep("STS", 40))
Dose <-c(rep("1uM", 40))
Treatmentsum <-c(rep(paste("Mark","3S",'STS','1uM',sep = "_"), 40))
PiD_treatmentsum <- c(rep(paste('Mark',"T5_20200501",'3S','STS','1uM',sep = "_"), 40))
sampleset <-data.frame(Row,column,Pid,Stimulation,Drug,Dose,Treatmentsum,PiD_treatmentsum)
write.table(sampleset, file = "ctrl_Mark_3S_STS_1uM.txt",sep="\t", row.names = F,col.names = T)
# rename all the files using their PiD_treatmentsum
filenames <- list.files("C:/UsersXXX", pattern="*.txt")
outdirectory <- "~/out"
lapply(filenames, function(x) {
df <- read.csv(x,sep="\t", header=TRUE, fill = T,stringsAsFactors = F)
a <- as.character(unique(df[["PiD_treatmentsum"]]))
b<-paste0("ctrl_",a, '.txt', sep="")
newname <- file.rename(basename(x), b)
write.table(df, paste0(outdirectory,"/", newname, sep="\t",
quote=FALSE, row.names=F, col.names=TRUE)
})
此处表示意外错误}。我想我一定是把循环搞砸了。
如果我只是剖析代码和 运行 一个文件作为示例,代码有效:
df <- read.csv('ctrl_Jack_3S_2DG_3uM.txt',sep="\t", header=TRUE,
fill = T,stringsAsFactors=F)
a <- as.character(unique(df[["PiD_treatmentsum"]]))
b<-paste0("ctrl_",a, '.txt', sep="")
basename('ctrl_Jack_3S_2DG_3uM.txt')
file.rename(basename('ctrl_Jack_3S_2DG_3uM.txt'), b)
```
一点帮助和解释将不胜感激:)
这应该有效:
create MRE
#file 1
Row <- c(rep("16", 20))
column <- c(rep("3", 20))
Pid<- c(rep("Jack", 20))
Stimulation<- c(rep("3S", 20))
Drug <- c(rep("2DG", 20))
Dose <-c(rep("3uM", 20))
Treatmentsum <-c(rep(paste("Jack","3S",'2DG','3uM',sep = "_"), 20))
PiD_treatmentsum <- c(rep(paste('Jack',"T4_20200501",'3S','2DG','3uM',sep = "_"), 20))
sampleset <-data.frame(Row,column,Pid,Stimulation,Drug,Dose,Treatmentsum,PiD_treatmentsum)
write.table(sampleset, file = "ctrl_Jack_3S_2DG_3uM.txt",sep="\t", row.names = F, col.names = T)
#file 2
Row <- c(rep("16", 40))
column <- c(rep("3", 40))
Pid<- c(rep("Mark", 40))
Stimulation<- c(rep("3S", 40))
Drug <- c(rep("STS", 40))
Dose <-c(rep("1uM", 40))
Treatmentsum <-c(rep(paste("Mark","3S",'STS','1uM',sep = "_"), 40))
PiD_treatmentsum <- c(rep(paste('Mark',"T5_20200501",'3S','STS','1uM',sep = "_"), 40))
sampleset <-data.frame(Row,column,Pid,Stimulation,Drug,Dose,Treatmentsum,PiD_treatmentsum)
write.table(sampleset, file = "ctrl_Mark_3S_STS_1uM.txt",sep="\t", row.names = F,col.names = T)
我只改了最后三行。我们使用 file.rename
重命名文件(newname
现在是 TRUE
或 FALSE
如果重命名时出错)
然后我们创建 outdirectory
(如果 dir 已经存在,它会发出警告,但不会覆盖任何内容。我们可以先测试 outdir
是否已经存在,如果存在则省略 dir.create
)
最后我们使用file.copy
将重命名的文件复制到outdirectory
。我们可以使用 file.path
连接目录和文件名。
# rename all the files using their PiD_treatmentsum
# and copy them to outdirectory
filenames <- list.files(".", pattern="*M\.txt")
outdirectory <- "~/out"
lapply(filenames, function(x) {
df <- read.csv(x, sep="\t", header=TRUE, fill = T,stringsAsFactors = F)
a <- as.character(unique(df[["PiD_treatmentsum"]]))
b<-paste0("ctrl_",a, '.txt', sep="")
newname <- file.rename(basename(x), b)
dir.create(outdirectory)
file.copy(b, file.path(outdirectory, b))
})
我建议将变量名更新为更有意义的名称,以使未来的重构更容易;)
我有一个 .txt 文件的文件夹,每个文件都有一个很长的字符串名称,例如 "ctrl_Jack_DrugA_XXuM.txt"。 但是这个名字缺少一个重要的字符串,timestamps。
但是,我在每个文件的数据框中都有该信息。 例如,在每个文件中,包含多个列,其中一个列称为 "Pid_treatmentsum": 其中的元素是"Jack_R4_200514_DrugA_XXuM.txt"
所以在我继续下游之前,我想根据名称(例如 Jack)和时间戳(例如 "R4_200514")将文件分类到子文件夹中,为此我需要重命名文件标题"Pid_treatmentsum".
现在代码:
```
#create MRE
#file 1
Row <- c(rep("16", 20))
column <- c(rep("3", 20))
Pid<- c(rep("Jack", 20))
Stimulation<- c(rep("3S", 20))
Drug <- c(rep("2DG", 20))
Dose <-c(rep("3uM", 20))
Treatmentsum <-c(rep(paste("Jack","3S",'2DG','3uM',sep = "_"), 20))
PiD_treatmentsum <- c(rep(paste('Jack',"T4_20200501",'3S','2DG','3uM',sep = "_"), 20))
sampleset <-data.frame(Row,column,Pid,Stimulation,Drug,Dose,Treatmentsum,PiD_treatmentsum)
write.table(sampleset, file = "ctrl_Jack_3S_2DG_3uM.txt",sep="\t", row.names = F, col.names = T)
#file 2
Row <- c(rep("16", 40))
column <- c(rep("3", 40))
Pid<- c(rep("Mark", 40))
Stimulation<- c(rep("3S", 40))
Drug <- c(rep("STS", 40))
Dose <-c(rep("1uM", 40))
Treatmentsum <-c(rep(paste("Mark","3S",'STS','1uM',sep = "_"), 40))
PiD_treatmentsum <- c(rep(paste('Mark',"T5_20200501",'3S','STS','1uM',sep = "_"), 40))
sampleset <-data.frame(Row,column,Pid,Stimulation,Drug,Dose,Treatmentsum,PiD_treatmentsum)
write.table(sampleset, file = "ctrl_Mark_3S_STS_1uM.txt",sep="\t", row.names = F,col.names = T)
# rename all the files using their PiD_treatmentsum
filenames <- list.files("C:/UsersXXX", pattern="*.txt")
outdirectory <- "~/out"
lapply(filenames, function(x) {
df <- read.csv(x,sep="\t", header=TRUE, fill = T,stringsAsFactors = F)
a <- as.character(unique(df[["PiD_treatmentsum"]]))
b<-paste0("ctrl_",a, '.txt', sep="")
newname <- file.rename(basename(x), b)
write.table(df, paste0(outdirectory,"/", newname, sep="\t",
quote=FALSE, row.names=F, col.names=TRUE)
})
此处表示意外错误}。我想我一定是把循环搞砸了。
如果我只是剖析代码和 运行 一个文件作为示例,代码有效:
df <- read.csv('ctrl_Jack_3S_2DG_3uM.txt',sep="\t", header=TRUE,
fill = T,stringsAsFactors=F)
a <- as.character(unique(df[["PiD_treatmentsum"]]))
b<-paste0("ctrl_",a, '.txt', sep="")
basename('ctrl_Jack_3S_2DG_3uM.txt')
file.rename(basename('ctrl_Jack_3S_2DG_3uM.txt'), b)
```
一点帮助和解释将不胜感激:)
这应该有效:
create MRE
#file 1
Row <- c(rep("16", 20))
column <- c(rep("3", 20))
Pid<- c(rep("Jack", 20))
Stimulation<- c(rep("3S", 20))
Drug <- c(rep("2DG", 20))
Dose <-c(rep("3uM", 20))
Treatmentsum <-c(rep(paste("Jack","3S",'2DG','3uM',sep = "_"), 20))
PiD_treatmentsum <- c(rep(paste('Jack',"T4_20200501",'3S','2DG','3uM',sep = "_"), 20))
sampleset <-data.frame(Row,column,Pid,Stimulation,Drug,Dose,Treatmentsum,PiD_treatmentsum)
write.table(sampleset, file = "ctrl_Jack_3S_2DG_3uM.txt",sep="\t", row.names = F, col.names = T)
#file 2
Row <- c(rep("16", 40))
column <- c(rep("3", 40))
Pid<- c(rep("Mark", 40))
Stimulation<- c(rep("3S", 40))
Drug <- c(rep("STS", 40))
Dose <-c(rep("1uM", 40))
Treatmentsum <-c(rep(paste("Mark","3S",'STS','1uM',sep = "_"), 40))
PiD_treatmentsum <- c(rep(paste('Mark',"T5_20200501",'3S','STS','1uM',sep = "_"), 40))
sampleset <-data.frame(Row,column,Pid,Stimulation,Drug,Dose,Treatmentsum,PiD_treatmentsum)
write.table(sampleset, file = "ctrl_Mark_3S_STS_1uM.txt",sep="\t", row.names = F,col.names = T)
我只改了最后三行。我们使用 file.rename
重命名文件(newname
现在是 TRUE
或 FALSE
如果重命名时出错)
然后我们创建 outdirectory
(如果 dir 已经存在,它会发出警告,但不会覆盖任何内容。我们可以先测试 outdir
是否已经存在,如果存在则省略 dir.create
)
最后我们使用file.copy
将重命名的文件复制到outdirectory
。我们可以使用 file.path
连接目录和文件名。
# rename all the files using their PiD_treatmentsum
# and copy them to outdirectory
filenames <- list.files(".", pattern="*M\.txt")
outdirectory <- "~/out"
lapply(filenames, function(x) {
df <- read.csv(x, sep="\t", header=TRUE, fill = T,stringsAsFactors = F)
a <- as.character(unique(df[["PiD_treatmentsum"]]))
b<-paste0("ctrl_",a, '.txt', sep="")
newname <- file.rename(basename(x), b)
dir.create(outdirectory)
file.copy(b, file.path(outdirectory, b))
})
我建议将变量名更新为更有意义的名称,以使未来的重构更容易;)