R:每个因子水平内的行之间的 t 检验
R: t-test between rows within each factor level
这是我要处理的数据框:
m <- matrix(rnorm(108, mean = 5000, sd = 1000), nrow = 36)
colnames(m) <- paste('V', 1:3, sep = '')
df <- data.frame(type = factor(rep(c('T1', 'T2', 'T3', 'T4', 'T5',
'T6', 'T7', 'T8', 'T9'), each = 4)),
treatment = factor(rep(rep(c('C','P', 'N', 'S'), each = 1),
9)),
as.data.frame(m))
我想知道如何在每个 "type" 的行之间执行 t 检验。这是我想要的类型 T1 的 t 检验示例:
t.test(df[1,3:5], df[2, 3:5])
t.test(df[1,3:5], df[3, 3:5])
t.test(df[1,3:5], df[4, 3:5])
t.test(df[1,3:5], df[3, 3:5])
t.test(df[1,3:5], df[4, 3:5])
我想弄清楚如何遍历所有行并从 t 检验中获取所有 p 值(以及用于识别的类型和处理),而不是手动计算每一行。任何帮助或建议将不胜感激。
像这样:
library(dplyr)
t_tests = df %>%
split(.$type) %>%
lapply(function(x){
t(x[3:5]) %>%
data.frame %>%
setNames(x$treatment) %>%
combn(2, simplify = FALSE) %>%
lapply(function(x){
data.frame(treatment = paste0(names(x), collapse = ", "),
p_value = t.test(x[,1], x[,2])$p.value)
}) %>%
do.call(rbind, .)
}) %>%
do.call(rbind, .) %>%
mutate(type = sub("[.].+", "", row.names(.)))
结果:
> head(t_tests, 10)
treatment p_value type
1 C, P 0.6112274 T1
2 C, N 0.6630060 T1
3 C, S 0.5945135 T1
4 P, N 0.9388568 T1
5 P, S 0.8349370 T1
6 N, S 0.9049995 T1
7 C, P 0.3274583 T2
8 C, N 0.9755364 T2
9 C, S 0.7391661 T2
10 P, N 0.3177871 T2
编辑(向数据集添加了额外的级别 "file"):
library(dplyr)
t_tests = df %>%
split(.$file) %>%
lapply(function(y){
split(y, y$type) %>%
lapply(function(x){
t(x[4:6]) %>%
data.frame %>%
setNames(x$treatment) %>%
combn(2, simplify = FALSE) %>%
lapply(function(x){
data.frame(treatment = paste0(names(x), collapse = ", "),
p_value = t.test(x[,1], x[,2])$p.value)
}) %>%
do.call(rbind, .)
}) %>%
do.call(rbind, .) %>%
mutate(type = sub("[.].+", "", row.names(.)))
}) %>%
do.call(rbind, .) %>%
mutate(file = sub("[.].+", "", row.names(.)))
结果:
treatment p_value type file
1 C, P 0.3903450 T1 file1
2 C, N 0.3288727 T1 file1
3 C, S 0.0638599 T1 file1
4 P, N 0.6927599 T1 file1
5 P, S 0.1159615 T1 file1
6 N, S 0.2184015 T1 file1
7 C, P 0.1147805 T2 file1
8 C, N 0.4961888 T2 file1
9 C, S 0.9048607 T2 file1
10 P, N 0.4203666 T2 file1
11 P, S 0.3425908 T2 file1
12 N, S 0.7262478 T2 file1
13 C, P 0.6300293 T3 file1
14 C, N 0.8255837 T3 file1
15 C, S 0.7140522 T3 file1
16 P, N 0.4768694 T3 file1
17 P, S 0.3992130 T3 file1
18 N, S 0.8740219 T3 file1
19 C, P 0.2434270 T4 file1
20 C, N 0.2713622 T4 file1
编辑注意事项:
OP 希望将额外的顶级 file
添加到数据中,可以简单地在末尾添加另一个 split
+ lapply
和 do.call
。
新数据:
m <- matrix(rnorm(324, mean = 5000, sd = 1000), nrow = 108)
colnames(m) <- paste('V', 1:3, sep = '')
df <- data.frame(type = factor(rep(c('T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9'), each = 4)),
treatment = factor(rep(rep(c('C','P', 'N', 'S'), each = 1), 9)),
file = factor(rep(c("file1", "file2", "file3"), each = 36)),
as.data.frame(m))
这是我要处理的数据框:
m <- matrix(rnorm(108, mean = 5000, sd = 1000), nrow = 36)
colnames(m) <- paste('V', 1:3, sep = '')
df <- data.frame(type = factor(rep(c('T1', 'T2', 'T3', 'T4', 'T5',
'T6', 'T7', 'T8', 'T9'), each = 4)),
treatment = factor(rep(rep(c('C','P', 'N', 'S'), each = 1),
9)),
as.data.frame(m))
我想知道如何在每个 "type" 的行之间执行 t 检验。这是我想要的类型 T1 的 t 检验示例:
t.test(df[1,3:5], df[2, 3:5])
t.test(df[1,3:5], df[3, 3:5])
t.test(df[1,3:5], df[4, 3:5])
t.test(df[1,3:5], df[3, 3:5])
t.test(df[1,3:5], df[4, 3:5])
我想弄清楚如何遍历所有行并从 t 检验中获取所有 p 值(以及用于识别的类型和处理),而不是手动计算每一行。任何帮助或建议将不胜感激。
像这样:
library(dplyr)
t_tests = df %>%
split(.$type) %>%
lapply(function(x){
t(x[3:5]) %>%
data.frame %>%
setNames(x$treatment) %>%
combn(2, simplify = FALSE) %>%
lapply(function(x){
data.frame(treatment = paste0(names(x), collapse = ", "),
p_value = t.test(x[,1], x[,2])$p.value)
}) %>%
do.call(rbind, .)
}) %>%
do.call(rbind, .) %>%
mutate(type = sub("[.].+", "", row.names(.)))
结果:
> head(t_tests, 10)
treatment p_value type
1 C, P 0.6112274 T1
2 C, N 0.6630060 T1
3 C, S 0.5945135 T1
4 P, N 0.9388568 T1
5 P, S 0.8349370 T1
6 N, S 0.9049995 T1
7 C, P 0.3274583 T2
8 C, N 0.9755364 T2
9 C, S 0.7391661 T2
10 P, N 0.3177871 T2
编辑(向数据集添加了额外的级别 "file"):
library(dplyr)
t_tests = df %>%
split(.$file) %>%
lapply(function(y){
split(y, y$type) %>%
lapply(function(x){
t(x[4:6]) %>%
data.frame %>%
setNames(x$treatment) %>%
combn(2, simplify = FALSE) %>%
lapply(function(x){
data.frame(treatment = paste0(names(x), collapse = ", "),
p_value = t.test(x[,1], x[,2])$p.value)
}) %>%
do.call(rbind, .)
}) %>%
do.call(rbind, .) %>%
mutate(type = sub("[.].+", "", row.names(.)))
}) %>%
do.call(rbind, .) %>%
mutate(file = sub("[.].+", "", row.names(.)))
结果:
treatment p_value type file
1 C, P 0.3903450 T1 file1
2 C, N 0.3288727 T1 file1
3 C, S 0.0638599 T1 file1
4 P, N 0.6927599 T1 file1
5 P, S 0.1159615 T1 file1
6 N, S 0.2184015 T1 file1
7 C, P 0.1147805 T2 file1
8 C, N 0.4961888 T2 file1
9 C, S 0.9048607 T2 file1
10 P, N 0.4203666 T2 file1
11 P, S 0.3425908 T2 file1
12 N, S 0.7262478 T2 file1
13 C, P 0.6300293 T3 file1
14 C, N 0.8255837 T3 file1
15 C, S 0.7140522 T3 file1
16 P, N 0.4768694 T3 file1
17 P, S 0.3992130 T3 file1
18 N, S 0.8740219 T3 file1
19 C, P 0.2434270 T4 file1
20 C, N 0.2713622 T4 file1
编辑注意事项:
OP 希望将额外的顶级 file
添加到数据中,可以简单地在末尾添加另一个 split
+ lapply
和 do.call
。
新数据:
m <- matrix(rnorm(324, mean = 5000, sd = 1000), nrow = 108)
colnames(m) <- paste('V', 1:3, sep = '')
df <- data.frame(type = factor(rep(c('T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9'), each = 4)),
treatment = factor(rep(rep(c('C','P', 'N', 'S'), each = 1), 9)),
file = factor(rep(c("file1", "file2", "file3"), each = 36)),
as.data.frame(m))