带统计测试的 dplyr

dplyr with stats test

我有以下数据设置

library(dplyr)
library(broom)

pop.mean = 0.10

df = data.frame( 
  trial    = as.integer(runif(1000, min = 5, max = 20)),
  success  = as.integer(runif(1000, min = 0, max = 20)),
  my.group = factor(rep(c("a","b","c","d"), each = 250))
)

我想在 my.group 上分组并申请 binom.test

bi.test <- df %>% group_by(my.group) %>%
  do(test = binom.test(sum(success),
                       sum(trial),
                       pop.mean,
                       alternative = c("two.sided"),
                       conf.level = 0.95))

收到错误消息,找不到成功我在这里做错了什么?

我们需要在 do

中使用 $ 提取列
res <- df %>% 
          group_by(my.group) %>%
          do(test = binom.test(sum(.$success),
                   sum(.$trial),
                   pop.mean,
                   alternative = c("two.sided"),
                   conf.level = 0.95))

如果我们使用broom函数,那么

res1 <- df %>%
           group_by(my.group) %>%
           do(test = tidy(binom.test(sum(.$success),
                   sum(.$trial),
                   pop.mean,
                   alternative = c("two.sided"),
                   conf.level = 0.95)))

res1$test %>% 
    bind_rows %>% 
    bind_cols(res1[1], .)
# A tibble: 4 x 9
#  my.group  estimate statistic p.value parameter  conf.low conf.high              method alternative
#    <fctr>     <dbl>     <dbl>   <dbl>     <dbl>     <dbl>     <dbl>              <fctr>      <fctr>
#1        a 0.7908251      2310       0      2921 0.7756166 0.8054487 Exact binomial test   two.sided
#2        b 0.7525138      2320       0      3083 0.7368831 0.7676640 Exact binomial test   two.sided
#3        c 0.8446337      2479       0      2935 0.8310152 0.8575612 Exact binomial test   two.sided
#4        d 0.7901683      2395       0      3031 0.7752305 0.8045438 Exact binomial test   two.sided

注意:数据集是使用 24 的种子创建的,即 set.seed(24)

感谢@akrun

阅读您的回答后,我想到了 tidyr::nest 和 purr::map 的解决方案。

res <- df %>%
  group_by(my.group) %>%
  tidyr::nest() %>%
  mutate(bi.test = 
           purrr::map(data, function(df) broom::tidy(
             binom.test(sum(df$success),
                        sum(df$trial),
                        pop.mean,
                        alternative = c("two.sided"),
                        conf.level = 0.95)))) %>%
  select(my.group, bi.test) %>%
  tidyr::unnest()