带统计测试的 dplyr
dplyr with stats test
我有以下数据设置
library(dplyr)
library(broom)
pop.mean = 0.10
df = data.frame(
trial = as.integer(runif(1000, min = 5, max = 20)),
success = as.integer(runif(1000, min = 0, max = 20)),
my.group = factor(rep(c("a","b","c","d"), each = 250))
)
我想在 my.group 上分组并申请 binom.test
bi.test <- df %>% group_by(my.group) %>%
do(test = binom.test(sum(success),
sum(trial),
pop.mean,
alternative = c("two.sided"),
conf.level = 0.95))
收到错误消息,找不到成功我在这里做错了什么?
我们需要在 do
中使用 $
提取列
res <- df %>%
group_by(my.group) %>%
do(test = binom.test(sum(.$success),
sum(.$trial),
pop.mean,
alternative = c("two.sided"),
conf.level = 0.95))
如果我们使用broom
函数,那么
res1 <- df %>%
group_by(my.group) %>%
do(test = tidy(binom.test(sum(.$success),
sum(.$trial),
pop.mean,
alternative = c("two.sided"),
conf.level = 0.95)))
res1$test %>%
bind_rows %>%
bind_cols(res1[1], .)
# A tibble: 4 x 9
# my.group estimate statistic p.value parameter conf.low conf.high method alternative
# <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fctr> <fctr>
#1 a 0.7908251 2310 0 2921 0.7756166 0.8054487 Exact binomial test two.sided
#2 b 0.7525138 2320 0 3083 0.7368831 0.7676640 Exact binomial test two.sided
#3 c 0.8446337 2479 0 2935 0.8310152 0.8575612 Exact binomial test two.sided
#4 d 0.7901683 2395 0 3031 0.7752305 0.8045438 Exact binomial test two.sided
注意:数据集是使用 24 的种子创建的,即 set.seed(24)
感谢@akrun
阅读您的回答后,我想到了 tidyr::nest 和 purr::map 的解决方案。
res <- df %>%
group_by(my.group) %>%
tidyr::nest() %>%
mutate(bi.test =
purrr::map(data, function(df) broom::tidy(
binom.test(sum(df$success),
sum(df$trial),
pop.mean,
alternative = c("two.sided"),
conf.level = 0.95)))) %>%
select(my.group, bi.test) %>%
tidyr::unnest()
我有以下数据设置
library(dplyr)
library(broom)
pop.mean = 0.10
df = data.frame(
trial = as.integer(runif(1000, min = 5, max = 20)),
success = as.integer(runif(1000, min = 0, max = 20)),
my.group = factor(rep(c("a","b","c","d"), each = 250))
)
我想在 my.group 上分组并申请 binom.test
bi.test <- df %>% group_by(my.group) %>%
do(test = binom.test(sum(success),
sum(trial),
pop.mean,
alternative = c("two.sided"),
conf.level = 0.95))
收到错误消息,找不到成功我在这里做错了什么?
我们需要在 do
$
提取列
res <- df %>%
group_by(my.group) %>%
do(test = binom.test(sum(.$success),
sum(.$trial),
pop.mean,
alternative = c("two.sided"),
conf.level = 0.95))
如果我们使用broom
函数,那么
res1 <- df %>%
group_by(my.group) %>%
do(test = tidy(binom.test(sum(.$success),
sum(.$trial),
pop.mean,
alternative = c("two.sided"),
conf.level = 0.95)))
res1$test %>%
bind_rows %>%
bind_cols(res1[1], .)
# A tibble: 4 x 9
# my.group estimate statistic p.value parameter conf.low conf.high method alternative
# <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fctr> <fctr>
#1 a 0.7908251 2310 0 2921 0.7756166 0.8054487 Exact binomial test two.sided
#2 b 0.7525138 2320 0 3083 0.7368831 0.7676640 Exact binomial test two.sided
#3 c 0.8446337 2479 0 2935 0.8310152 0.8575612 Exact binomial test two.sided
#4 d 0.7901683 2395 0 3031 0.7752305 0.8045438 Exact binomial test two.sided
注意:数据集是使用 24 的种子创建的,即 set.seed(24)
感谢@akrun
阅读您的回答后,我想到了 tidyr::nest 和 purr::map 的解决方案。
res <- df %>%
group_by(my.group) %>%
tidyr::nest() %>%
mutate(bi.test =
purrr::map(data, function(df) broom::tidy(
binom.test(sum(df$success),
sum(df$trial),
pop.mean,
alternative = c("two.sided"),
conf.level = 0.95)))) %>%
select(my.group, bi.test) %>%
tidyr::unnest()