Fisher 在数据框中对 R 进行精确检验
Fisher's exact test in R from dataframe
我有输入数据 (df) 用于为每一行制作 2*2 应急 table。
df <- data.frame(as = c("A", "B", "C", "D"), sum_m = c(47, 8, 93, 73),
length_m = c(150, 150, 150, 150), sum_w = c(66, 183, 44, 113), length_w = c(199, 199, 199, 199),
pooled_p = c(0.32378223495702, 0.547277936962751, 0.392550143266476, 0.532951289398281),
test1 = c(TRUE, TRUE, TRUE, TRUE), test2 = c(TRUE, TRUE, TRUE, TRUE), test3 = c(TRUE, TRUE, TRUE, TRUE),
test4 = c(TRUE, TRUE, TRUE, TRUE), final_test = c(TRUE, TRUE, TRUE, TRUE))
我写了一个小脚本(如下所示)来计算单行的 p 值:
# Chi-square or Fisher's exact test
x <- c(sum_m, sum_w)
n <- c(length_m, length_w)
mash <- rbind(c(sum_m, length_m - sum_m),
c(sum_w, length_w - sum_w))
if(final_test == TRUE){
## With Yate's continuity correction
prop.test(x,n)
#Exactly the same as:
chisq.test(mash)
}else{
# Fisher's exact test
fisher.test(mash)
}
希望这对您有意义。
非常感谢有关如何将其应用于大量行的建议!如果可能,请将 p 值粘贴到最后一列。
提前致谢:X)
我们可以将代码包装到一个函数中,然后使用 rowwise
并应用该函数
library(dplyr)
library(tidyr)
df %>%
rowwise %>%
mutate(out = list(f1(sum_m, sum_w, length_m, length_w, final_test) %>%
broom::tidy(.))) %>%
ungroup %>%
unnest(out)
-输出
# A tibble: 4 × 15
as sum_m length_m sum_w length_w pooled_p test1 test2 test3 test4 final_test statistic p.value parameter method
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <lgl> <lgl> <lgl> <lgl> <dbl> <dbl> <int> <chr>
1 A 47 150 66 199 0.324 TRUE TRUE TRUE TRUE TRUE 0.0608 8.05e- 1 1 Pearson's Chi-squared test with…
2 B 8 150 183 199 0.547 TRUE TRUE TRUE TRUE TRUE 256. 1.59e-57 1 Pearson's Chi-squared test with…
3 C 93 150 44 199 0.393 TRUE TRUE TRUE TRUE TRUE 55.4 9.77e-14 1 Pearson's Chi-squared test with…
4 D 73 150 113 199 0.533 TRUE TRUE TRUE TRUE TRUE 1.95 1.63e- 1 1 Pearson's Chi-squared test with…
使用 pmap
可能比 rowwise
更快
library(purrr)
df %>%
mutate(out = pmap(across(c(sum_m, sum_w, length_m, length_w, final_test)),
~ f1(..1, ..2, ..3, ..4, ..5) %>%
broom::tidy(.))) %>%
unnest(out)
-输出
# A tibble: 4 × 15
as sum_m length_m sum_w length_w pooled_p test1 test2 test3 test4 final_test statistic p.value parameter method
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <lgl> <lgl> <lgl> <lgl> <dbl> <dbl> <int> <chr>
1 A 47 150 66 199 0.324 TRUE TRUE TRUE TRUE TRUE 0.0608 8.05e- 1 1 Pearson's Chi-squared test with…
2 B 8 150 183 199 0.547 TRUE TRUE TRUE TRUE TRUE 256. 1.59e-57 1 Pearson's Chi-squared test with…
3 C 93 150 44 199 0.393 TRUE TRUE TRUE TRUE TRUE 55.4 9.77e-14 1 Pearson's Chi-squared test with…
4 D 73 150 113 199 0.533 TRUE TRUE TRUE TRUE TRUE 1.95 1.63e- 1 1 Pearson's Chi-squared test with…
-函数
f1 <- function(sum_m, sum_w, length_m, length_w, final_test) {
x <- c(sum_m, sum_w)
n <- c(length_m, length_w)
mash <- rbind(c(sum_m, length_m - sum_m),
c(sum_w, length_w - sum_w))
if(final_test == TRUE){
## With Yate's continuity correction
prop.test(x,n)
#Exactly the same as:
chisq.test(mash)
}else{
# Fisher's exact test
fisher.test(mash)
}
}
我建议您在需要按行或按列应用函数时使用函数。
如果您不想使用 dyplr,R 基础解决方案:
test = function(x1,x2,y1,y2,test){
mash = rbind(c(x1, x2 - x1),
c(y1, y2 - y1))
if(test){
res = chisq.test(mash)
}
else{
res = fisher.test(mash)
}
return(res$p.value)
}
mapply(FUN = test,
df$sum_m, df$length_m, df$sum_w, df$length_w, df$final_test)
结果:
mapply(FUN = test,df$sum_m,df$length_m,df$sum_w,df$length_w,df$final_test)
[1] 8.051833e-01 1.590633e-57 9.772551e-14 1.626199e-01
我有输入数据 (df) 用于为每一行制作 2*2 应急 table。
df <- data.frame(as = c("A", "B", "C", "D"), sum_m = c(47, 8, 93, 73),
length_m = c(150, 150, 150, 150), sum_w = c(66, 183, 44, 113), length_w = c(199, 199, 199, 199),
pooled_p = c(0.32378223495702, 0.547277936962751, 0.392550143266476, 0.532951289398281),
test1 = c(TRUE, TRUE, TRUE, TRUE), test2 = c(TRUE, TRUE, TRUE, TRUE), test3 = c(TRUE, TRUE, TRUE, TRUE),
test4 = c(TRUE, TRUE, TRUE, TRUE), final_test = c(TRUE, TRUE, TRUE, TRUE))
我写了一个小脚本(如下所示)来计算单行的 p 值:
# Chi-square or Fisher's exact test
x <- c(sum_m, sum_w)
n <- c(length_m, length_w)
mash <- rbind(c(sum_m, length_m - sum_m),
c(sum_w, length_w - sum_w))
if(final_test == TRUE){
## With Yate's continuity correction
prop.test(x,n)
#Exactly the same as:
chisq.test(mash)
}else{
# Fisher's exact test
fisher.test(mash)
}
希望这对您有意义。
非常感谢有关如何将其应用于大量行的建议!如果可能,请将 p 值粘贴到最后一列。
提前致谢:X)
我们可以将代码包装到一个函数中,然后使用 rowwise
并应用该函数
library(dplyr)
library(tidyr)
df %>%
rowwise %>%
mutate(out = list(f1(sum_m, sum_w, length_m, length_w, final_test) %>%
broom::tidy(.))) %>%
ungroup %>%
unnest(out)
-输出
# A tibble: 4 × 15
as sum_m length_m sum_w length_w pooled_p test1 test2 test3 test4 final_test statistic p.value parameter method
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <lgl> <lgl> <lgl> <lgl> <dbl> <dbl> <int> <chr>
1 A 47 150 66 199 0.324 TRUE TRUE TRUE TRUE TRUE 0.0608 8.05e- 1 1 Pearson's Chi-squared test with…
2 B 8 150 183 199 0.547 TRUE TRUE TRUE TRUE TRUE 256. 1.59e-57 1 Pearson's Chi-squared test with…
3 C 93 150 44 199 0.393 TRUE TRUE TRUE TRUE TRUE 55.4 9.77e-14 1 Pearson's Chi-squared test with…
4 D 73 150 113 199 0.533 TRUE TRUE TRUE TRUE TRUE 1.95 1.63e- 1 1 Pearson's Chi-squared test with…
使用 pmap
可能比 rowwise
library(purrr)
df %>%
mutate(out = pmap(across(c(sum_m, sum_w, length_m, length_w, final_test)),
~ f1(..1, ..2, ..3, ..4, ..5) %>%
broom::tidy(.))) %>%
unnest(out)
-输出
# A tibble: 4 × 15
as sum_m length_m sum_w length_w pooled_p test1 test2 test3 test4 final_test statistic p.value parameter method
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <lgl> <lgl> <lgl> <lgl> <dbl> <dbl> <int> <chr>
1 A 47 150 66 199 0.324 TRUE TRUE TRUE TRUE TRUE 0.0608 8.05e- 1 1 Pearson's Chi-squared test with…
2 B 8 150 183 199 0.547 TRUE TRUE TRUE TRUE TRUE 256. 1.59e-57 1 Pearson's Chi-squared test with…
3 C 93 150 44 199 0.393 TRUE TRUE TRUE TRUE TRUE 55.4 9.77e-14 1 Pearson's Chi-squared test with…
4 D 73 150 113 199 0.533 TRUE TRUE TRUE TRUE TRUE 1.95 1.63e- 1 1 Pearson's Chi-squared test with…
-函数
f1 <- function(sum_m, sum_w, length_m, length_w, final_test) {
x <- c(sum_m, sum_w)
n <- c(length_m, length_w)
mash <- rbind(c(sum_m, length_m - sum_m),
c(sum_w, length_w - sum_w))
if(final_test == TRUE){
## With Yate's continuity correction
prop.test(x,n)
#Exactly the same as:
chisq.test(mash)
}else{
# Fisher's exact test
fisher.test(mash)
}
}
我建议您在需要按行或按列应用函数时使用函数。
如果您不想使用 dyplr,R 基础解决方案:
test = function(x1,x2,y1,y2,test){
mash = rbind(c(x1, x2 - x1),
c(y1, y2 - y1))
if(test){
res = chisq.test(mash)
}
else{
res = fisher.test(mash)
}
return(res$p.value)
}
mapply(FUN = test,
df$sum_m, df$length_m, df$sum_w, df$length_w, df$final_test)
结果:
mapply(FUN = test,df$sum_m,df$length_m,df$sum_w,df$length_w,df$final_test)
[1] 8.051833e-01 1.590633e-57 9.772551e-14 1.626199e-01