使用 for 循环或 lapply 从 ttest 附加到 r 中的数据帧行
using for loop or lapply to append to rows of data frame in r from the ttest
好的,我上网查了一下,我还在创建一个复制线的数据框。
我有一个用于创建 welch t 检验结果的 for 循环,我保存了这样的值:
gene <- biomarkers$Symbol
pval <- ttest$p.value
tstat <- ttest$statistic
我尝试使用 for 循环进行迭代,以将结果添加到在块开始时创建的数据框中
df2 <- data.frame(pathol=(character()),
genes=character(),
p_value=character(),
t_stat=character(),
stringsAsFactors=FALSE)
for (gene in biomarkers$Symbol) {
print(gene)
ddat <- degs[degs$Symbol== gene & degs$pathol=="mitosis",]
ttest <- t.test(logFC ~ value, data = ddat)
print (ttest)
df2[nrow(df2) + 1,] #(this added the 10 genes only to the rows, not to the column)
然后我意识到我需要使用 lapply...我试过这个:
prova <- lapply(biomarkers$Symbol, function(gene) {
append = (gene)
#ttest <- t.test(logFC ~ value, data = ddat)
})
do.call(rbind, prova)
这创建了一个包含十个基因的列表,但是当我取消注释变量 'ttest' 时,它只是添加了一个列表:
[1,] "logFC by value"
[2,] "logFC by value"
[3,] "logFC by value"
[4,] "logFC by value"
[5,] "logFC by value"
[6,] "logFC by value"
[7,] "logFC by value"
[8,] "logFC by value"
[9,] "logFC by value"
[10,] "logFC by value"
我想得到一个如下所示的数据框:
pathol
genes
p_value
t_stat
mitosis
PBK
0.05
000.4
mitosis
PLK4
0.02
000.9
#所有行的基因都相同。
任何帮助都会很棒!
编辑
dput输出:
dput(head(ddat))
structure(list(experiment = c("FP001RO_15_HI", "FP001RO_15_HI",
"FP001RO_15_HI", "FP001RO_15_LOW", "FP001RO_15_LOW", "FP001RO_15_LOW"
), Human.Gene.entrezID = c("57405", "57405", "57405", "57405",
"57405", "57405"), Symbol = c("SPC25", "SPC25", "SPC25", "SPC25",
"SPC25", "SPC25"), description = c("SPC25 component of NDC80 kinetochore complex",
"SPC25 component of NDC80 kinetochore complex", "SPC25 component of NDC80 kinetochore complex",
"SPC25 component of NDC80 kinetochore complex", "SPC25 component of NDC80 kinetochore complex",
"SPC25 component of NDC80 kinetochore complex"), score = c(3.867,
3.867, 3.867, 3.867, 3.867, 3.867), type = c("WGGNC", "WGGNC",
"WGGNC", "WGGNC", "WGGNC", "WGGNC"), pathol = c("mitosis", "mitosis",
"mitosis", "mitosis", "mitosis", "mitosis"), Probeid = c("295661_at",
"295661_at", "295661_at", "295661_at", "295661_at", "295661_at"
), logFC = c(-0.0641349806730976, -0.0641349806730976, -0.0641349806730976,
-0.0324566291924587, -0.0324566291924587, -0.0324566291924587
), AveExpr = c(4.1541958195567, 4.1541958195567, 4.1541958195567,
4.17003499529702, 4.17003499529702, 4.17003499529702), t = c(-0.567682269120301,
-0.567682269120301, -0.567682269120301, -0.214562465957216, -0.214562465957216,
-0.214562465957216), P.Value = c(0.580865708246137, 0.580865708246137,
0.580865708246137, 0.833648126277364, 0.833648126277364, 0.833648126277364
), adj.P.Val = c(0.828914361133465, 0.828914361133465, 0.828914361133465,
0.999594589241814, 0.999594589241814, 0.999594589241814), B = c(-6.4683360535952,
-6.4683360535952, -6.4683360535952, -5.45944975240508, -5.45944975240508,
-5.45944975240508), cpd = c("FP001RO", "FP001RO", "FP001RO",
"FP001RO", "FP001RO", "FP001RO"), time = c(15, 15, 15, 15, 15,
15), dose = c("HI", "HI", "HI", "LOW", "LOW", "LOW"), entrezgene_rat = c(295661,
295661, 295661, 295661, 295661, 295661), external_gene_name_rat = c("Spc25",
"Spc25", "Spc25", "Spc25", "Spc25", "Spc25"), external_gene_name_human = c("SPC25",
"SPC25", "SPC25", "SPC25", "SPC25", "SPC25"), entrezGene_probes_human = c("57405_at",
"57405_at", "57405_at", "57405_at", "57405_at", "57405_at"),
inMap_human_withGrey = c(1, 1, 1, 1, 1, 1), inMap_rat_withGrey = c(1,
1, 1, 1, 1, 1), variable = structure(c(11L, 12L, 10L, 10L,
11L, 12L), .Label = c("Necrosis1", "Necrosis2", "Necrosis3",
"hyperpl1", "hyperpl2", "hyperpl3", "fibrosis", "hypertrophy1",
"hypertrophy2", "mitosis1", "mitosis2", "mitosis3", "vacuolation1",
"vacuolation2", "vacuolation3"), class = "factor"), value = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
pathology = c("mitosis", "mitosis", "mitosis", "mitosis",
"mitosis", "mitosis")), row.names = c(13L, 14L, 15L, 55L,
56L, 57L), class = "data.frame")
基因输入
dput(biomarkers$Symbol)
c("PBK", "PLK4", "CDKN3", "CDCA3", "PRC1", "CDK1", "TCF19", "SHCBP1",
"CENPK", "SPC25")
我们可以使用
prova <- lapply(biomarkers$Symbol, function(gene) {
ddat <- subset(degs, Symbol == gene & pathol =="mitosis")
fmla <- reformulate('value', response = 'logFC')
if(nlevels(droplevels(ddat$val)) >=2) {
ttest <- t.test(fmla, data = ddat)
data.frame(pathol = 'mitosis', genes = gene,
p_value = ttest$p.value, t_stat = ttest$statistic)
} else NULL
})
names(prova) <- biomarkers$Symbol
out <- do.call(rbind, prova)
补充你的问题,一次做很多检验的时候记得进行多重假设检验校正。
根据 akrun 的回答,这里是 BH 更正:
ord = order(prova$p.value)
prova$fdr = prova$p.value*nrow(prova)/ord
这将减少误报的数量。
好的,我上网查了一下,我还在创建一个复制线的数据框。 我有一个用于创建 welch t 检验结果的 for 循环,我保存了这样的值:
gene <- biomarkers$Symbol
pval <- ttest$p.value
tstat <- ttest$statistic
我尝试使用 for 循环进行迭代,以将结果添加到在块开始时创建的数据框中
df2 <- data.frame(pathol=(character()),
genes=character(),
p_value=character(),
t_stat=character(),
stringsAsFactors=FALSE)
for (gene in biomarkers$Symbol) {
print(gene)
ddat <- degs[degs$Symbol== gene & degs$pathol=="mitosis",]
ttest <- t.test(logFC ~ value, data = ddat)
print (ttest)
df2[nrow(df2) + 1,] #(this added the 10 genes only to the rows, not to the column)
然后我意识到我需要使用 lapply...我试过这个:
prova <- lapply(biomarkers$Symbol, function(gene) {
append = (gene)
#ttest <- t.test(logFC ~ value, data = ddat)
})
do.call(rbind, prova)
这创建了一个包含十个基因的列表,但是当我取消注释变量 'ttest' 时,它只是添加了一个列表:
[1,] "logFC by value"
[2,] "logFC by value"
[3,] "logFC by value"
[4,] "logFC by value"
[5,] "logFC by value"
[6,] "logFC by value"
[7,] "logFC by value"
[8,] "logFC by value"
[9,] "logFC by value"
[10,] "logFC by value"
我想得到一个如下所示的数据框:
pathol | genes | p_value | t_stat |
---|---|---|---|
mitosis | PBK | 0.05 | 000.4 |
mitosis | PLK4 | 0.02 | 000.9 |
#所有行的基因都相同。 任何帮助都会很棒!
编辑
dput输出:
dput(head(ddat))
structure(list(experiment = c("FP001RO_15_HI", "FP001RO_15_HI",
"FP001RO_15_HI", "FP001RO_15_LOW", "FP001RO_15_LOW", "FP001RO_15_LOW"
), Human.Gene.entrezID = c("57405", "57405", "57405", "57405",
"57405", "57405"), Symbol = c("SPC25", "SPC25", "SPC25", "SPC25",
"SPC25", "SPC25"), description = c("SPC25 component of NDC80 kinetochore complex",
"SPC25 component of NDC80 kinetochore complex", "SPC25 component of NDC80 kinetochore complex",
"SPC25 component of NDC80 kinetochore complex", "SPC25 component of NDC80 kinetochore complex",
"SPC25 component of NDC80 kinetochore complex"), score = c(3.867,
3.867, 3.867, 3.867, 3.867, 3.867), type = c("WGGNC", "WGGNC",
"WGGNC", "WGGNC", "WGGNC", "WGGNC"), pathol = c("mitosis", "mitosis",
"mitosis", "mitosis", "mitosis", "mitosis"), Probeid = c("295661_at",
"295661_at", "295661_at", "295661_at", "295661_at", "295661_at"
), logFC = c(-0.0641349806730976, -0.0641349806730976, -0.0641349806730976,
-0.0324566291924587, -0.0324566291924587, -0.0324566291924587
), AveExpr = c(4.1541958195567, 4.1541958195567, 4.1541958195567,
4.17003499529702, 4.17003499529702, 4.17003499529702), t = c(-0.567682269120301,
-0.567682269120301, -0.567682269120301, -0.214562465957216, -0.214562465957216,
-0.214562465957216), P.Value = c(0.580865708246137, 0.580865708246137,
0.580865708246137, 0.833648126277364, 0.833648126277364, 0.833648126277364
), adj.P.Val = c(0.828914361133465, 0.828914361133465, 0.828914361133465,
0.999594589241814, 0.999594589241814, 0.999594589241814), B = c(-6.4683360535952,
-6.4683360535952, -6.4683360535952, -5.45944975240508, -5.45944975240508,
-5.45944975240508), cpd = c("FP001RO", "FP001RO", "FP001RO",
"FP001RO", "FP001RO", "FP001RO"), time = c(15, 15, 15, 15, 15,
15), dose = c("HI", "HI", "HI", "LOW", "LOW", "LOW"), entrezgene_rat = c(295661,
295661, 295661, 295661, 295661, 295661), external_gene_name_rat = c("Spc25",
"Spc25", "Spc25", "Spc25", "Spc25", "Spc25"), external_gene_name_human = c("SPC25",
"SPC25", "SPC25", "SPC25", "SPC25", "SPC25"), entrezGene_probes_human = c("57405_at",
"57405_at", "57405_at", "57405_at", "57405_at", "57405_at"),
inMap_human_withGrey = c(1, 1, 1, 1, 1, 1), inMap_rat_withGrey = c(1,
1, 1, 1, 1, 1), variable = structure(c(11L, 12L, 10L, 10L,
11L, 12L), .Label = c("Necrosis1", "Necrosis2", "Necrosis3",
"hyperpl1", "hyperpl2", "hyperpl3", "fibrosis", "hypertrophy1",
"hypertrophy2", "mitosis1", "mitosis2", "mitosis3", "vacuolation1",
"vacuolation2", "vacuolation3"), class = "factor"), value = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
pathology = c("mitosis", "mitosis", "mitosis", "mitosis",
"mitosis", "mitosis")), row.names = c(13L, 14L, 15L, 55L,
56L, 57L), class = "data.frame")
基因输入
dput(biomarkers$Symbol)
c("PBK", "PLK4", "CDKN3", "CDCA3", "PRC1", "CDK1", "TCF19", "SHCBP1",
"CENPK", "SPC25")
我们可以使用
prova <- lapply(biomarkers$Symbol, function(gene) {
ddat <- subset(degs, Symbol == gene & pathol =="mitosis")
fmla <- reformulate('value', response = 'logFC')
if(nlevels(droplevels(ddat$val)) >=2) {
ttest <- t.test(fmla, data = ddat)
data.frame(pathol = 'mitosis', genes = gene,
p_value = ttest$p.value, t_stat = ttest$statistic)
} else NULL
})
names(prova) <- biomarkers$Symbol
out <- do.call(rbind, prova)
补充你的问题,一次做很多检验的时候记得进行多重假设检验校正。
根据 akrun 的回答,这里是 BH 更正:
ord = order(prova$p.value)
prova$fdr = prova$p.value*nrow(prova)/ord
这将减少误报的数量。