嵌套 for 循环到矢量化函数并在数据框中保存值
Nested for loop to vectorized function and saving value in dataframe
我有一个非常大的输出,需要把它放在 table 中。应该有 3380 行和 250 列,table 应该是这样的
。 (=input_template_df_0)
然后我有另一个 table,其输出在一列中包含列名和行名,看起来像这样
。 (=input_lofreq_4)
我知道如何使用嵌套 for 循环执行此操作,但正如您所想象的那样,这需要很长时间(!)。我(还)不太擅长应用和矢量化函数,所以我希望这个社区中的某个人可以帮助我将我的函数转换为更快的函数?
PS:还有一个if函数,因为并不是所有的组合都会在input_lofreq_4
中找到
for(i in 1:length(rownames(input_template_df_0))){
for(j in 1:length(colnames(input_template_df_0))){
if(!is_empty(as.numeric(as.character(input_lofreq_4[input_lofreq_4$selectrow==rownames(input_template_df_0)[i] & input_lofreq_4$selectcolumn==colnames(input_template_df_0)[j],"VF"])))) {
input_template_df_0[i,j] <- as.numeric(as.character(input_lofreq_4[input_lofreq_4$selectrow==rownames(input_template_df)[i] & input_lofreq_4$selectcolumn==colnames(input_template_df_0)[j],"VF"]))
}
}
}
谢谢!
编辑:我制作了简化的数据框
selectrow <- c("5000X-WT50-MUT50_ERR5059072","5000X-WT50-MUT50_ERR5059238","5000X-WT50-MUT50_ERR5059260","5000X-WT50-MUT50_ERR5059092","5000X-WT50-MUT50_ERR5059204","5000X-WT50-MUT50_ERR5059123","5000X-WT50-MUT50_ERR5059282","5000X-WT50-MUT50_ERR5059072","5000X-WT50-MUT50_ERR5059238","5000X-WT50-MUT50_ERR5059260","5000X-WT50-MUT50_ERR5059092","5000X-WT50-MUT50_ERR5059204","5000X-WT50-MUT50_ERR5059123","5000X-WT50-MUT50_ERR5059282","5000X-WT50-MUT50_ERR5059072","5000X-WT50-MUT50_ERR5059238","5000X-WT50-MUT50_ERR5059260","5000X-WT50-MUT50_ERR5059092","5000X-WT50-MUT50_ERR5059204","5000X-WT50-MUT50_ERR5059123","5000X-WT50-MUT50_ERR5059282")
selectcolumn <- c("C913T_ERR5059114","C913T_ERR5059114","C913T_ERR5059114","C913T_ERR5059114","C913T_ERR5059114","C913T_ERR5059114","C913T_ERR5059114","C913T_ERR5059253","C913T_ERR5059253","C913T_ERR5059253","C913T_ERR5059253","C913T_ERR5059253","C913T_ERR5059253","C913T_ERR5059253","C913T_ERR5059286","C913T_ERR5059286","C913T_ERR5059286","C913T_ERR5059286","C913T_ERR5059286","C913T_ERR5059286","C913T_ERR5059286")
VF <- c("0.51","0.48","0.52","0.49","0.5","0.5","0.48","0.52","0.51","0.52","0.5","0.5","0.48","0.5","0.5","0.48","0.49","0.51","0.5","0.48","0.52")
input_lofreq_4 <- as.data.frame(cbind(selectrow, selectcolumn, VF))
input_template_df_0 <- as.data.frame(matrix(NA, ncol = 3, nrow = 7))
colnames(input_template_df_0) <- c("C913T_ERR5059114","C913T_ERR5059253","C913T_ERR5059286")
rownames(input_template_df_0) <- c("5000X-WT50-MUT50_ERR5059072","5000X-WT50-MUT50_ERR5059238","5000X-WT50-MUT50_ERR5059260","5000X-WT50-MUT50_ERR5059092","5000X-WT50-MUT50_ERR5059204","5000X-WT50-MUT50_ERR5059123","5000X-WT50-MUT50_ERR5059282")
input_template_df_0
input_lofreq_4
你可以利用 lapply
:
rn <- rownames(input_template_df_0)
input_template_df_0[] <- lapply(names(input_template_df_0), function(x) {
with(input_lofreq_4, VF[rn == selectrow & selectcolumn == x])
})
input_template_df_0
# C913T_ERR5059114 C913T_ERR5059253 C913T_ERR5059286
#5000X-WT50-MUT50_ERR5059072 0.51 0.52 0.5
#5000X-WT50-MUT50_ERR5059238 0.48 0.51 0.48
#5000X-WT50-MUT50_ERR5059260 0.52 0.52 0.49
#5000X-WT50-MUT50_ERR5059092 0.49 0.5 0.51
#5000X-WT50-MUT50_ERR5059204 0.5 0.5 0.5
#5000X-WT50-MUT50_ERR5059123 0.5 0.48 0.48
#5000X-WT50-MUT50_ERR5059282 0.48 0.5 0.52
如果数据框中有某些值,您可以使用:
tmp <- paste(input_lofreq_4$selectrow, input_lofreq_4$selectcolumn, sep = '-')
input_template_df_0[] <- input_lofreq_4$VF[match(outer(rownames(input_template_df_0),
colnames(input_template_df_0), paste, sep = '-'), tmp)]
我有一个非常大的输出,需要把它放在 table 中。应该有 3380 行和 250 列,table 应该是这样的
然后我有另一个 table,其输出在一列中包含列名和行名,看起来像这样
我知道如何使用嵌套 for 循环执行此操作,但正如您所想象的那样,这需要很长时间(!)。我(还)不太擅长应用和矢量化函数,所以我希望这个社区中的某个人可以帮助我将我的函数转换为更快的函数?
PS:还有一个if函数,因为并不是所有的组合都会在input_lofreq_4
中找到for(i in 1:length(rownames(input_template_df_0))){
for(j in 1:length(colnames(input_template_df_0))){
if(!is_empty(as.numeric(as.character(input_lofreq_4[input_lofreq_4$selectrow==rownames(input_template_df_0)[i] & input_lofreq_4$selectcolumn==colnames(input_template_df_0)[j],"VF"])))) {
input_template_df_0[i,j] <- as.numeric(as.character(input_lofreq_4[input_lofreq_4$selectrow==rownames(input_template_df)[i] & input_lofreq_4$selectcolumn==colnames(input_template_df_0)[j],"VF"]))
}
}
}
谢谢!
编辑:我制作了简化的数据框
selectrow <- c("5000X-WT50-MUT50_ERR5059072","5000X-WT50-MUT50_ERR5059238","5000X-WT50-MUT50_ERR5059260","5000X-WT50-MUT50_ERR5059092","5000X-WT50-MUT50_ERR5059204","5000X-WT50-MUT50_ERR5059123","5000X-WT50-MUT50_ERR5059282","5000X-WT50-MUT50_ERR5059072","5000X-WT50-MUT50_ERR5059238","5000X-WT50-MUT50_ERR5059260","5000X-WT50-MUT50_ERR5059092","5000X-WT50-MUT50_ERR5059204","5000X-WT50-MUT50_ERR5059123","5000X-WT50-MUT50_ERR5059282","5000X-WT50-MUT50_ERR5059072","5000X-WT50-MUT50_ERR5059238","5000X-WT50-MUT50_ERR5059260","5000X-WT50-MUT50_ERR5059092","5000X-WT50-MUT50_ERR5059204","5000X-WT50-MUT50_ERR5059123","5000X-WT50-MUT50_ERR5059282")
selectcolumn <- c("C913T_ERR5059114","C913T_ERR5059114","C913T_ERR5059114","C913T_ERR5059114","C913T_ERR5059114","C913T_ERR5059114","C913T_ERR5059114","C913T_ERR5059253","C913T_ERR5059253","C913T_ERR5059253","C913T_ERR5059253","C913T_ERR5059253","C913T_ERR5059253","C913T_ERR5059253","C913T_ERR5059286","C913T_ERR5059286","C913T_ERR5059286","C913T_ERR5059286","C913T_ERR5059286","C913T_ERR5059286","C913T_ERR5059286")
VF <- c("0.51","0.48","0.52","0.49","0.5","0.5","0.48","0.52","0.51","0.52","0.5","0.5","0.48","0.5","0.5","0.48","0.49","0.51","0.5","0.48","0.52")
input_lofreq_4 <- as.data.frame(cbind(selectrow, selectcolumn, VF))
input_template_df_0 <- as.data.frame(matrix(NA, ncol = 3, nrow = 7))
colnames(input_template_df_0) <- c("C913T_ERR5059114","C913T_ERR5059253","C913T_ERR5059286")
rownames(input_template_df_0) <- c("5000X-WT50-MUT50_ERR5059072","5000X-WT50-MUT50_ERR5059238","5000X-WT50-MUT50_ERR5059260","5000X-WT50-MUT50_ERR5059092","5000X-WT50-MUT50_ERR5059204","5000X-WT50-MUT50_ERR5059123","5000X-WT50-MUT50_ERR5059282")
input_template_df_0
input_lofreq_4
你可以利用 lapply
:
rn <- rownames(input_template_df_0)
input_template_df_0[] <- lapply(names(input_template_df_0), function(x) {
with(input_lofreq_4, VF[rn == selectrow & selectcolumn == x])
})
input_template_df_0
# C913T_ERR5059114 C913T_ERR5059253 C913T_ERR5059286
#5000X-WT50-MUT50_ERR5059072 0.51 0.52 0.5
#5000X-WT50-MUT50_ERR5059238 0.48 0.51 0.48
#5000X-WT50-MUT50_ERR5059260 0.52 0.52 0.49
#5000X-WT50-MUT50_ERR5059092 0.49 0.5 0.51
#5000X-WT50-MUT50_ERR5059204 0.5 0.5 0.5
#5000X-WT50-MUT50_ERR5059123 0.5 0.48 0.48
#5000X-WT50-MUT50_ERR5059282 0.48 0.5 0.52
如果数据框中有某些值,您可以使用:
tmp <- paste(input_lofreq_4$selectrow, input_lofreq_4$selectcolumn, sep = '-')
input_template_df_0[] <- input_lofreq_4$VF[match(outer(rownames(input_template_df_0),
colnames(input_template_df_0), paste, sep = '-'), tmp)]