通过排列进行多组测试
Multiple groups tests via permutation
我有一个 df,其中包含与两个实验相关的两组值(value_1 和 value_2)。
一个实验包含两组(0 和 1),另一个包含三个组(0,1,2)。
test group Value_1 Value_2
AA 0 15.1 11.2
AA 0 12.4 8.6
AA 1 9.6 22.5
AA 1 10.2 22
BB 0 12.11 11
BB 0 14 1.2
BB 1 11 13.2
BB 1 12.3 9
BB 2 14.2 12
BB 2 15 13
df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L,
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4,
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2,
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test",
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA,
-10L))
我想对 value_1 和 value_2 应用置换测试 - 按测试,按组 - 覆盖:
- 两组情况(0 vs 1)和
- 多于 2 个案例(0 对 1、0 对 2、1 对 2)。
到目前为止我所做的 - 分成几块是:
当组只有两个时,我简单地应用oneway.test():
df %>%
filter(test %in% 'AA') -> df_test_aa
df_test_aa_value_1 <- oneway.test(df_test_aa$Value_1~df_test_aa$group)
df_test_aa_value_1$p.value
[1] 0.2011234
df_test_aa_value_2 <- oneway.test(df_test_aa$Value_2~df_test_aa$group)
df_test_aa_value_2$p.value
[1] 0.05854026
每当组数超过 2 个时,我会测试所有可能的排列:
前 0 对 1:
df %>% filter(test %in% 'BB' & group %in% c(0,1)) -> df_test_bb_01
df_test_bb_01_value_1 <- oneway.test(df_test_bb_01$Value_1~df_test_bb_01$group)
df_test_bb_01_value_1$p.value
[1] 0.3585415
df_test_bb_01_value_2 <- oneway.test(df_test_bb_01$Value_2~df_test_bb_01$group)
df_test_bb_01_value_2$p.value
[1] 0.4848446
然后 0 对 2:
df %>%
filter(test %in% 'BB' & group %in% c(0,2)) -> df_test_bb_02
df_test_bb_02_value_1 <- oneway.test(df_test_bb_02$Value_1~df_test_bb_02$group)
df_test_bb_02_value_1$p.value
[1] 0.3246012
df_test_bb_02_value_2 <- oneway.test(df_test_bb_02$Value_2~df_test_bb_02$group)
df_test_bb_02_value_2$p.value
[1] 0.4142838
然后 1 对 2:
df %>%
filter(test %in% 'BB' & group %in% c(1,2)) -> df_test_bb_12
df_test_bb_12_value_1 <- oneway.test(df_test_bb_12$Value_1~df_test_bb_12$group)
df_test_bb_12_value_1$p.value
[1] 0.08105404
df_test_bb_12_value_2 <- oneway.test(df_test_bb_12$Value_2~df_test_bb_12$group)
df_test_bb_12_value_2$p.value
[1] 0.6245713
因此,我希望获得如下所示的 df:
test value p_value_2sided hypothesis
AA Value_1 0.201 0,1
AA Value_2 0.059 0,1
BB Value_1 0.359 0,1
BB Value_1 0.325 0,2
BB Value_1 0.081 1,2
BB Value_2 0.485 0,1
BB Value_2 0.414 0,2
BB Value_2 0.625 1,2
感谢您的提示!
嗯,这不是很漂亮,但是......
df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L,
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4,
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2,
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test",
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA,
-10L))
as.data.frame(
do.call(rbind, by(df, factor(df$test), function(x) {
h <- t(combn(unique(x$group), 2))
p <- apply(h, 1, function(y) {
with(x[x$group %in% y, ], {
c(oneway.test(Value_1 ~ group)$p.value,
oneway.test(Value_2 ~ group)$p.value)
})
})
h <- rep(paste(h[, 1], h[, 2], sep = ","), each = 2)
cbind(test = as.character(x$test[1]), value = c("Value_1", "Value_2"), p_value = as.vector(p), hypothesis = h)
}))
)
test value p_value hypothesis
1 AA Value_1 0.201123366107666 0,1
2 AA Value_2 0.0585402590546805 0,1
3 BB Value_1 0.358541470571387 0,1
4 BB Value_2 0.484844587956832 0,1
5 BB Value_1 0.324601180998953 0,2
6 BB Value_2 0.414283756097153 0,2
7 BB Value_1 0.0810540380817137 1,2
8 BB Value_2 0.624571310834221 1,2
df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L,
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4,
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2,
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test",
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA, -10L))
library(tidyverse)
# reshape dataset
df2 = df %>% gather(value, v, -test, -group)
# function to compute p value
# vectorized version
f = function(t,val,x1,x2) {
(df2 %>%
filter(test == t & value == val & group %in% c(x1,x2)) %>%
oneway.test(v~group, data = .))$p.value }
f = Vectorize(f)
df2 %>%
distinct(test, group, value) %>% # get unique combinations
group_by(test, value) %>% # for each test and value
nest() %>% # nest rest of columns
mutate(d = map(data, ~data.frame(t(combn(.$group, 2)))),
hypothesis = map(d, ~paste0(.$X1, ",", .$X2))) %>% # get pairs/combinations of values
unnest(d, hypothesis) %>% # unnest data
mutate(pval = f(test, value, X1, X2)) # apply vectorised function to get p value
# # A tibble: 8 x 6
# test value hypothesis X1 X2 pval
# <fctr> <chr> <chr> <int> <int> <dbl>
# 1 AA Value_1 0,1 0 1 0.201
# 2 BB Value_1 0,1 0 1 0.359
# 3 BB Value_1 0,2 0 2 0.325
# 4 BB Value_1 1,2 1 2 0.0811
# 5 AA Value_2 0,1 0 1 0.0585
# 6 BB Value_2 0,1 0 1 0.485
# 7 BB Value_2 0,2 0 2 0.414
# 8 BB Value_2 1,2 1 2 0.625
如果确实不需要X1
和X2
,可以删除它们。
但是,通过这种方式,您(也)可以将它们作为单独的数字变量,以防您想在分析的后期阶段在另一个过程中使用它们(例如,对特定组进行过滤)。
我有一个 df,其中包含与两个实验相关的两组值(value_1 和 value_2)。
一个实验包含两组(0 和 1),另一个包含三个组(0,1,2)。
test group Value_1 Value_2
AA 0 15.1 11.2
AA 0 12.4 8.6
AA 1 9.6 22.5
AA 1 10.2 22
BB 0 12.11 11
BB 0 14 1.2
BB 1 11 13.2
BB 1 12.3 9
BB 2 14.2 12
BB 2 15 13
df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L,
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4,
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2,
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test",
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA,
-10L))
我想对 value_1 和 value_2 应用置换测试 - 按测试,按组 - 覆盖:
- 两组情况(0 vs 1)和
- 多于 2 个案例(0 对 1、0 对 2、1 对 2)。
到目前为止我所做的 - 分成几块是:
当组只有两个时,我简单地应用oneway.test():
df %>% filter(test %in% 'AA') -> df_test_aa df_test_aa_value_1 <- oneway.test(df_test_aa$Value_1~df_test_aa$group) df_test_aa_value_1$p.value [1] 0.2011234 df_test_aa_value_2 <- oneway.test(df_test_aa$Value_2~df_test_aa$group) df_test_aa_value_2$p.value [1] 0.05854026
每当组数超过 2 个时,我会测试所有可能的排列:
前 0 对 1:
df %>% filter(test %in% 'BB' & group %in% c(0,1)) -> df_test_bb_01 df_test_bb_01_value_1 <- oneway.test(df_test_bb_01$Value_1~df_test_bb_01$group) df_test_bb_01_value_1$p.value [1] 0.3585415 df_test_bb_01_value_2 <- oneway.test(df_test_bb_01$Value_2~df_test_bb_01$group) df_test_bb_01_value_2$p.value [1] 0.4848446
然后 0 对 2:
df %>% filter(test %in% 'BB' & group %in% c(0,2)) -> df_test_bb_02 df_test_bb_02_value_1 <- oneway.test(df_test_bb_02$Value_1~df_test_bb_02$group) df_test_bb_02_value_1$p.value [1] 0.3246012 df_test_bb_02_value_2 <- oneway.test(df_test_bb_02$Value_2~df_test_bb_02$group) df_test_bb_02_value_2$p.value [1] 0.4142838
然后 1 对 2:
df %>% filter(test %in% 'BB' & group %in% c(1,2)) -> df_test_bb_12 df_test_bb_12_value_1 <- oneway.test(df_test_bb_12$Value_1~df_test_bb_12$group) df_test_bb_12_value_1$p.value [1] 0.08105404 df_test_bb_12_value_2 <- oneway.test(df_test_bb_12$Value_2~df_test_bb_12$group) df_test_bb_12_value_2$p.value [1] 0.6245713
因此,我希望获得如下所示的 df:
test value p_value_2sided hypothesis
AA Value_1 0.201 0,1
AA Value_2 0.059 0,1
BB Value_1 0.359 0,1
BB Value_1 0.325 0,2
BB Value_1 0.081 1,2
BB Value_2 0.485 0,1
BB Value_2 0.414 0,2
BB Value_2 0.625 1,2
感谢您的提示!
嗯,这不是很漂亮,但是......
df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L,
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4,
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2,
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test",
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA,
-10L))
as.data.frame(
do.call(rbind, by(df, factor(df$test), function(x) {
h <- t(combn(unique(x$group), 2))
p <- apply(h, 1, function(y) {
with(x[x$group %in% y, ], {
c(oneway.test(Value_1 ~ group)$p.value,
oneway.test(Value_2 ~ group)$p.value)
})
})
h <- rep(paste(h[, 1], h[, 2], sep = ","), each = 2)
cbind(test = as.character(x$test[1]), value = c("Value_1", "Value_2"), p_value = as.vector(p), hypothesis = h)
}))
)
test value p_value hypothesis
1 AA Value_1 0.201123366107666 0,1
2 AA Value_2 0.0585402590546805 0,1
3 BB Value_1 0.358541470571387 0,1
4 BB Value_2 0.484844587956832 0,1
5 BB Value_1 0.324601180998953 0,2
6 BB Value_2 0.414283756097153 0,2
7 BB Value_1 0.0810540380817137 1,2
8 BB Value_2 0.624571310834221 1,2
df <- structure(list(test = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("AA", "BB"), class = "factor"), group = c(0L,
0L, 1L, 1L, 0L, 0L, 1L, 1L, 2L, 2L), Value_1 = c(15.1, 12.4,
9.6, 10.2, 12.11, 14, 11, 12.3, 14.2, 15), Value_2 = c(11.2,
8.6, 22.5, 22, 11, 1.2, 13.2, 9, 12, 13)), .Names = c("test",
"group", "Value_1", "Value_2"), class = "data.frame", row.names = c(NA, -10L))
library(tidyverse)
# reshape dataset
df2 = df %>% gather(value, v, -test, -group)
# function to compute p value
# vectorized version
f = function(t,val,x1,x2) {
(df2 %>%
filter(test == t & value == val & group %in% c(x1,x2)) %>%
oneway.test(v~group, data = .))$p.value }
f = Vectorize(f)
df2 %>%
distinct(test, group, value) %>% # get unique combinations
group_by(test, value) %>% # for each test and value
nest() %>% # nest rest of columns
mutate(d = map(data, ~data.frame(t(combn(.$group, 2)))),
hypothesis = map(d, ~paste0(.$X1, ",", .$X2))) %>% # get pairs/combinations of values
unnest(d, hypothesis) %>% # unnest data
mutate(pval = f(test, value, X1, X2)) # apply vectorised function to get p value
# # A tibble: 8 x 6
# test value hypothesis X1 X2 pval
# <fctr> <chr> <chr> <int> <int> <dbl>
# 1 AA Value_1 0,1 0 1 0.201
# 2 BB Value_1 0,1 0 1 0.359
# 3 BB Value_1 0,2 0 2 0.325
# 4 BB Value_1 1,2 1 2 0.0811
# 5 AA Value_2 0,1 0 1 0.0585
# 6 BB Value_2 0,1 0 1 0.485
# 7 BB Value_2 0,2 0 2 0.414
# 8 BB Value_2 1,2 1 2 0.625
如果确实不需要X1
和X2
,可以删除它们。
但是,通过这种方式,您(也)可以将它们作为单独的数字变量,以防您想在分析的后期阶段在另一个过程中使用它们(例如,对特定组进行过滤)。