在 r 中循环 binom.test()
loop for binom.test() in r
我有一个成功率、概率和样本量的数据集,我正在运行进行二项式检验。
这是数据样本(请注意,实际数据集有我 运行 >100 个二项式检验):
km n_1 prey_pred p0_prey_pred
<fct> <dbl> <int> <dbl>
80 93 12 0.119
81 1541 103 0.0793
83 316 5 0.0364
84 721 44 0.0796
89 866 58 0.131
我通常运行这个(第一行的例子):
n=93
p0=0.119
successes=12
binom.test(obs.successes, n, p0, "two.sided")
> Exact binomial test
data: 12 and 93
number of successes = 12, number of trials = 93, p-value = 0.74822
alternative hypothesis: true probability of success is not equal to 0.119
95 percent confidence interval:
0.068487201 0.214548325
sample estimates:
probability of success
0.12903226
有没有办法系统地对每一行数据进行 运行 多项式检验,然后将所有输出(p 值、置信区间、成功概率)存储为单独的列?
我已经尝试了 here 提出的解决方案,但我显然 m
您可以按照评论中的建议为此定义一个函数:
my_binom <- function(x, n, p){
res <- binom.test(x, n, p)
out <- data.frame(res$p.value, res$conf.int[1], res$conf.int[2], res$estimate)
names(out) <- c("p", "lower_ci", "upper_ci", "p_success")
rownames(out) <- NULL
return(out)
}
然后你可以为每一行应用它
do.call("rbind.data.frame", apply(df, 1, function(row_i){
my_binom(x= row_i["prey_pred"], n= row_i["n_1"], p=
row_i["p0_prey_pred"])
}))
使用apply
。
res <- t(`colnames<-`(apply(dat, 1, FUN=function(x) {
rr <- binom.test(x[3], x[2], x[4], "two.sided")
with(rr, c(x, "2.5%"=conf.int[1], estimate=unname(estimate),
"97.5%"=conf.int[2], p.value=unname(p.value)))
}), dat$km))
res
# km n_1 prey_pred p0_prey_pred 2.5% estimate 97.5% p.value
# 80 80 93 12 0.1190 0.068487201 0.12903226 0.21454832 7.482160e-01
# 81 81 1541 103 0.0793 0.054881013 0.06683971 0.08047927 7.307921e-02
# 83 83 316 5 0.0364 0.005157062 0.01582278 0.03653685 4.960168e-02
# 84 84 721 44 0.0796 0.044688325 0.06102635 0.08106220 7.311463e-02
# 89 89 866 58 0.1310 0.051245893 0.06697460 0.08572304 1.656621e-09
编辑
如果您有多个列集,采用宽幅格式(出于某种原因想留在那里)
dat2 <- `colnames<-`(cbind(dat, dat[-1]), c("km", "n_1.1", "prey_pred.1", "p0_prey_pred.1",
"n_1.2", "prey_pred.2", "p0_prey_pred.2"))
dat2[1:3,]
# km n_1.1 prey_pred.1 p0_prey_pred.1 n_1.2 prey_pred.2 p0_prey_pred.2
# 1 80 93 12 0.1190 93 12 0.1190
# 2 81 1541 103 0.0793 1541 103 0.0793
# 3 83 316 5 0.0364 316 5 0.0364
你可以这样做:
res2 <- t(`colnames<-`(apply(dat2, 1, FUN=function(x) {
rr1 <- binom.test(x[3], x[2], x[4], "two.sided")
rr2 <- binom.test(x[6], x[5], x[7], "two.sided")
rrr1 <- with(rr1, c("2.5%.1"=conf.int[1], estimate.1=unname(estimate),
"97.5%.1"=conf.int[2], p.value.1=unname(p.value)))
rrr2 <- with(rr2, c("2.5%.1"=conf.int[1], estimate.1=unname(estimate),
"97.5%.1"=conf.int[2], p.value.1=unname(p.value)))
c(x, rrr1, rrr2)
}), dat2$km))
res2
# km n_1.1 prey_pred.1 p0_prey_pred.1 n_1.2 prey_pred.2 p0_prey_pred.2 2.5%.1
# 80 80 93 12 0.1190 93 12 0.1190 0.068487201
# 81 81 1541 103 0.0793 1541 103 0.0793 0.054881013
# 83 83 316 5 0.0364 316 5 0.0364 0.005157062
# 84 84 721 44 0.0796 721 44 0.0796 0.044688325
# 89 89 866 58 0.1310 866 58 0.1310 0.051245893
# estimate.1 97.5%.1 p.value.1 2.5%.1 estimate.1 97.5%.1 p.value.1
# 80 0.12903226 0.21454832 7.482160e-01 0.068487201 0.12903226 0.21454832 7.482160e-01
# 81 0.06683971 0.08047927 7.307921e-02 0.054881013 0.06683971 0.08047927 7.307921e-02
# 83 0.01582278 0.03653685 4.960168e-02 0.005157062 0.01582278 0.03653685 4.960168e-02
# 84 0.06102635 0.08106220 7.311463e-02 0.044688325 0.06102635 0.08106220 7.311463e-02
# 89 0.06697460 0.08572304 1.656621e-09 0.051245893 0.06697460 0.08572304 1.656621e-09
人们可以编写更嵌套的代码,但我建议让事情变得简单,以便以后其他人更好地理解正在发生的事情,可能包括自己。
数据:
dat <- read.table(text="km n_1 prey_pred p0_prey_pred
80 93 12 0.119
81 1541 103 0.0793
83 316 5 0.0364
84 721 44 0.0796
89 866 58 0.131 ", header=TRUE)
我有一个成功率、概率和样本量的数据集,我正在运行进行二项式检验。
这是数据样本(请注意,实际数据集有我 运行 >100 个二项式检验):
km n_1 prey_pred p0_prey_pred
<fct> <dbl> <int> <dbl>
80 93 12 0.119
81 1541 103 0.0793
83 316 5 0.0364
84 721 44 0.0796
89 866 58 0.131
我通常运行这个(第一行的例子):
n=93
p0=0.119
successes=12
binom.test(obs.successes, n, p0, "two.sided")
> Exact binomial test
data: 12 and 93
number of successes = 12, number of trials = 93, p-value = 0.74822
alternative hypothesis: true probability of success is not equal to 0.119
95 percent confidence interval:
0.068487201 0.214548325
sample estimates:
probability of success
0.12903226
有没有办法系统地对每一行数据进行 运行 多项式检验,然后将所有输出(p 值、置信区间、成功概率)存储为单独的列?
我已经尝试了 here 提出的解决方案,但我显然 m
您可以按照评论中的建议为此定义一个函数:
my_binom <- function(x, n, p){
res <- binom.test(x, n, p)
out <- data.frame(res$p.value, res$conf.int[1], res$conf.int[2], res$estimate)
names(out) <- c("p", "lower_ci", "upper_ci", "p_success")
rownames(out) <- NULL
return(out)
}
然后你可以为每一行应用它
do.call("rbind.data.frame", apply(df, 1, function(row_i){
my_binom(x= row_i["prey_pred"], n= row_i["n_1"], p=
row_i["p0_prey_pred"])
}))
使用apply
。
res <- t(`colnames<-`(apply(dat, 1, FUN=function(x) {
rr <- binom.test(x[3], x[2], x[4], "two.sided")
with(rr, c(x, "2.5%"=conf.int[1], estimate=unname(estimate),
"97.5%"=conf.int[2], p.value=unname(p.value)))
}), dat$km))
res
# km n_1 prey_pred p0_prey_pred 2.5% estimate 97.5% p.value
# 80 80 93 12 0.1190 0.068487201 0.12903226 0.21454832 7.482160e-01
# 81 81 1541 103 0.0793 0.054881013 0.06683971 0.08047927 7.307921e-02
# 83 83 316 5 0.0364 0.005157062 0.01582278 0.03653685 4.960168e-02
# 84 84 721 44 0.0796 0.044688325 0.06102635 0.08106220 7.311463e-02
# 89 89 866 58 0.1310 0.051245893 0.06697460 0.08572304 1.656621e-09
编辑
如果您有多个列集,采用宽幅格式(出于某种原因想留在那里)
dat2 <- `colnames<-`(cbind(dat, dat[-1]), c("km", "n_1.1", "prey_pred.1", "p0_prey_pred.1",
"n_1.2", "prey_pred.2", "p0_prey_pred.2"))
dat2[1:3,]
# km n_1.1 prey_pred.1 p0_prey_pred.1 n_1.2 prey_pred.2 p0_prey_pred.2
# 1 80 93 12 0.1190 93 12 0.1190
# 2 81 1541 103 0.0793 1541 103 0.0793
# 3 83 316 5 0.0364 316 5 0.0364
你可以这样做:
res2 <- t(`colnames<-`(apply(dat2, 1, FUN=function(x) {
rr1 <- binom.test(x[3], x[2], x[4], "two.sided")
rr2 <- binom.test(x[6], x[5], x[7], "two.sided")
rrr1 <- with(rr1, c("2.5%.1"=conf.int[1], estimate.1=unname(estimate),
"97.5%.1"=conf.int[2], p.value.1=unname(p.value)))
rrr2 <- with(rr2, c("2.5%.1"=conf.int[1], estimate.1=unname(estimate),
"97.5%.1"=conf.int[2], p.value.1=unname(p.value)))
c(x, rrr1, rrr2)
}), dat2$km))
res2
# km n_1.1 prey_pred.1 p0_prey_pred.1 n_1.2 prey_pred.2 p0_prey_pred.2 2.5%.1
# 80 80 93 12 0.1190 93 12 0.1190 0.068487201
# 81 81 1541 103 0.0793 1541 103 0.0793 0.054881013
# 83 83 316 5 0.0364 316 5 0.0364 0.005157062
# 84 84 721 44 0.0796 721 44 0.0796 0.044688325
# 89 89 866 58 0.1310 866 58 0.1310 0.051245893
# estimate.1 97.5%.1 p.value.1 2.5%.1 estimate.1 97.5%.1 p.value.1
# 80 0.12903226 0.21454832 7.482160e-01 0.068487201 0.12903226 0.21454832 7.482160e-01
# 81 0.06683971 0.08047927 7.307921e-02 0.054881013 0.06683971 0.08047927 7.307921e-02
# 83 0.01582278 0.03653685 4.960168e-02 0.005157062 0.01582278 0.03653685 4.960168e-02
# 84 0.06102635 0.08106220 7.311463e-02 0.044688325 0.06102635 0.08106220 7.311463e-02
# 89 0.06697460 0.08572304 1.656621e-09 0.051245893 0.06697460 0.08572304 1.656621e-09
人们可以编写更嵌套的代码,但我建议让事情变得简单,以便以后其他人更好地理解正在发生的事情,可能包括自己。
数据:
dat <- read.table(text="km n_1 prey_pred p0_prey_pred
80 93 12 0.119
81 1541 103 0.0793
83 316 5 0.0364
84 721 44 0.0796
89 866 58 0.131 ", header=TRUE)