如何使用 R 包 srvyr 执行多个 t.tests?

How to perform multiple t.tests with R package srvyr?

作为对最近一个 SO 问题的跟进(参见 ),我想知道如何使用加权数据(包 srvyrR 中执行多个 t.tests ).我做不到 运行,如果有人能在这里帮助我,我会很高兴。我在下面的代码中添加了一个随机样本。 非常感谢!

      #create data
      surveydata <- as.data.frame(replicate(1,sample(1:5,1000,rep=TRUE)))
      colnames(surveydata)[1] <- "q1"
      surveydata$q2 <- sample(6, size = nrow(surveydata), replace = TRUE)
      surveydata$q3 <- sample(6, size = nrow(surveydata), replace = TRUE)
      surveydata$q4 <- sample(6, size = nrow(surveydata), replace = TRUE)
      surveydata$group <- c(1,2)
      
      #replace all value "6" wir NA
      surveydata[surveydata == 6] <- NA
      
      #add NAs to group 1 in q1
      surveydata$q1[which(surveydata$q1==1 & surveydata$group==1)] = NA
      surveydata$q1[which(surveydata$q1==2 & surveydata$group==1)] = NA
      surveydata$q1[which(surveydata$q1==3 & surveydata$group==1)] = NA
      surveydata$q1[which(surveydata$q1==4 & surveydata$group==1)] = NA
      surveydata$q1[which(surveydata$q1==5 & surveydata$group==1)] = NA
      
      #add weights
      surveydata$weights <- round(runif(nrow(surveydata), min=0.2, max=1.5), 3)

      #create vector for relevant questions
                rquest <- names(surveydata)[1:4]


              
      # create survey design
                library(srvyr)
                surveydesign <- surveydata %>%
                          as_survey_design(strata = group, weights = weights, variables = c("group", all_of(rquest)))
      


      # perform multiple t.test (doesn't work yet)
                outcome <- do.call(rbind, lapply(names(surveydesign$variables)[-1], function(i) {
                          tryCatch({
                                    test <- t.test(as.formula(paste(i, "~ survey")), data = surveydesign)
                                    data.frame(question = i, 
                                               group1 = test$estimate[1], 
                                               group2 = test$estimate[2], 
                                               difference = diff(test$estimate),
                                               p_value = test$p.value, row.names = 1)
                          }, error = function(e) {
                                    data.frame(question = i, 
                                               group1 = NA,
                                               group2 = NA, 
                                               difference = NA,
                                               p_value = NA, row.names = 1)
                          })
                }))
                
                                  

                     
                                  

据我了解,您在示例 q1 至 q4 中有一系列问题列。您已使用 srvyr 生成 weights 列。在我们的数据中,对于一个特定的问题,整个小组可能都是 NA 并且您希望将结果生成到 df 中,即使这是真的。您想要 weighted Student's t-test 使用 weights 列而不是简单的 t-test。我发现唯一提供该功能的函数是 weights::wtd.t.test,它不提供公式界面,但希望输入向量。

按照采取的步骤顺序:

  1. 加载必要的库
library(srvyr)
library(dplyr)
library(rlang)
library(weights)
  1. 构建一个自定义函数,通过变量删除 NAs,pulls xyweightxweighty,运行测试,并将您想要的信息提取到 df 行中。
multiple_wt_ttest <- function(i) {
   i <- ensym(i)
   xxx <- surveydata %>% 
      filter(!is.na(!!i)) %>% 
      split(.$group)
   newx <- pull(xxx[[1]], i)
   newy <- pull(xxx[[2]], i)
   wtx <- pull(xxx[[1]], weights)
   wty <- pull(xxx[[2]], weights)
   test <- wtd.t.test(x = newx, 
                      y = newy, 
                      weight = wtx, 
                      weighty = wty, 
                      samedata = FALSE)
   data.frame(question = rlang::as_name(i), 
              group1 = test$additional[[2]],
              group2 = test$additional[[3]], 
              difference = test$additional[[1]],
              p.value = test$coefficients[[3]])
}
  1. 一旦我们有了函数,我们就可以使用 lapply 逐列应用它(注意它处理 q2 中的情况,其中 group == 1 全部是 NA
lapply(names(surveydata)[1:4], multiple_wt_ttest)
#> [[1]]
#>   question group1   group2 difference p.value
#> 1       q1    NaN 3.010457        NaN      NA
#> 
#> [[2]]
#>   question   group1   group2  difference  p.value
#> 1       q2 3.009003 3.071842 -0.06283922 0.515789
#> 
#> [[3]]
#>   question   group1   group2 difference   p.value
#> 1       q3 2.985096 2.968867  0.0162288 0.8734034
#> 
#> [[4]]
#>   question   group1   group2 difference    p.value
#> 1       q4 2.856255 3.047787 -0.1915322 0.04290471
  1. 最后,用do.callrbind包起来,做成你想要的df
do.call(rbind, lapply(names(surveydata)[1:4], multiple_wt_ttest))
#>   question   group1   group2  difference    p.value
#> 1       q1      NaN 3.010457         NaN         NA
#> 2       q2 3.009003 3.071842 -0.06283922 0.51578905
#> 3       q3 2.985096 2.968867  0.01622880 0.87340343
#> 4       q4 2.856255 3.047787 -0.19153218 0.04290471

您的数据(没有显示创建它的所有旋转和标题前 200 行)

surveydata <- 
structure(list(q1 = c(NA, 1L, NA, 4L, NA, 5L, NA, 3L, NA, 5L, 
NA, 5L, NA, 1L, NA, 5L, NA, 3L, NA, 4L, NA, 5L, NA, 4L, NA, 2L, 
NA, 5L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 5L, 
NA, 4L, NA, 4L, NA, 3L, NA, 4L, NA, 2L, NA, 4L, NA, 3L, NA, 1L, 
NA, 1L, NA, 3L, NA, 5L, NA, 3L, NA, 5L, NA, 5L, NA, 4L, NA, 2L, 
NA, 5L, NA, 1L, NA, 3L, NA, 2L, NA, 5L, NA, 4L, NA, 1L, NA, 5L, 
NA, 2L, NA, 2L, NA, 4L, NA, 1L, NA, 3L, NA, 4L, NA, 5L, NA, 3L, 
NA, 5L, NA, 1L, NA, 1L, NA, 3L, NA, 2L, NA, 4L, NA, 4L, NA, 1L, 
NA, 4L, NA, 3L, NA, 2L, NA, 3L, NA, 5L, NA, 2L, NA, 5L, NA, 2L, 
NA, 1L, NA, 5L, NA, 2L, NA, 1L, NA, 2L, NA, 3L, NA, 2L, NA, 3L, 
NA, 4L, NA, 4L, NA, 3L, NA, 1L, NA, 3L, NA, 1L, NA, 5L, NA, 3L, 
NA, 5L, NA, 4L, NA, 1L, NA, 4L, NA, 1L, NA, 3L, NA, 1L, NA, 4L, 
NA, 5L, NA, 4L, NA, 4L, NA, 3L, NA, 3L, NA, 2L, NA, 1L), q2 = c(NA, 
2L, 2L, 1L, 5L, 4L, 3L, 2L, 4L, 4L, 5L, 1L, 4L, 5L, 1L, 4L, NA, 
2L, 2L, 5L, 5L, 4L, 5L, 4L, NA, 1L, 3L, 4L, 5L, 2L, NA, 5L, 2L, 
NA, 4L, 4L, 5L, 4L, 1L, NA, 5L, 1L, 4L, 2L, 1L, NA, 5L, 1L, 3L, 
2L, 4L, NA, 2L, NA, 1L, 4L, NA, 2L, 3L, NA, 3L, 1L, 1L, 1L, 1L, 
1L, 4L, 5L, 1L, 4L, 5L, 4L, NA, 2L, 3L, 2L, 2L, 2L, 4L, 2L, 3L, 
5L, NA, 2L, NA, NA, 5L, 2L, 3L, 2L, 1L, 5L, 3L, 2L, 1L, 2L, NA, 
1L, 3L, 5L, 5L, 1L, 1L, NA, 3L, 3L, 1L, 2L, 3L, 3L, 2L, 4L, 2L, 
5L, 4L, 3L, 1L, NA, 4L, 3L, 1L, 5L, 5L, 5L, 2L, 2L, 4L, 5L, 4L, 
1L, 3L, NA, 1L, 3L, 5L, 2L, 1L, 3L, 3L, NA, NA, 5L, NA, 5L, 2L, 
5L, 2L, NA, NA, NA, 1L, 4L, 3L, 2L, 3L, 1L, 3L, 5L, 1L, 2L, 3L, 
5L, 4L, 4L, NA, NA, 5L, 2L, 3L, 3L, 2L, 2L, 1L, 3L, 1L, 4L, 5L, 
2L, 5L, 3L, 1L, 5L, NA, 4L, 3L, 5L, 1L, 1L, 3L, 4L, 4L, 1L, 4L, 
3L, 3L, NA, 2L, 3L, 5L, 5L), q3 = c(4L, 4L, 1L, NA, 4L, 5L, 1L, 
3L, 4L, 4L, 1L, 3L, 2L, 1L, 2L, 4L, 2L, 3L, 4L, 4L, 1L, 3L, 4L, 
5L, 5L, 1L, 3L, 5L, 1L, 2L, 1L, 5L, 5L, 3L, 1L, 3L, 1L, 5L, 1L, 
3L, NA, NA, 3L, 5L, NA, 2L, 2L, 1L, 1L, 3L, 5L, 5L, 2L, NA, 5L, 
2L, 3L, NA, NA, 3L, 2L, 5L, 2L, 1L, NA, NA, 4L, 2L, NA, 1L, NA, 
NA, 5L, 3L, 5L, 4L, 2L, 4L, NA, 2L, 4L, 5L, NA, 2L, 1L, 3L, NA, 
5L, 5L, 4L, 5L, 1L, 5L, 4L, 5L, 3L, 2L, 2L, 2L, 1L, 2L, 1L, NA, 
NA, 5L, 1L, 2L, 5L, 5L, 5L, 3L, 3L, 3L, 2L, 4L, NA, 3L, NA, 3L, 
4L, 2L, 2L, 5L, 1L, NA, 1L, NA, 2L, 2L, 3L, 2L, 5L, 1L, 4L, 4L, 
3L, 5L, 5L, NA, NA, 4L, NA, 5L, 1L, 1L, 2L, 5L, 4L, 5L, 4L, 1L, 
1L, NA, 4L, 4L, 4L, 5L, 1L, NA, 2L, 3L, NA, 1L, NA, NA, NA, 4L, 
2L, 4L, 2L, 1L, 1L, 2L, 1L, 5L, 1L, 3L, 3L, 4L, NA, 1L, 1L, 1L, 
3L, 5L, 1L, NA, 3L, 5L, 5L, 4L, NA, 1L, 4L, 5L, 3L, 5L, NA, 1L, 
4L), q4 = c(NA, 3L, 1L, 1L, 2L, NA, 1L, 5L, 1L, 3L, 3L, 1L, 3L, 
5L, 1L, 3L, 2L, 1L, 1L, 3L, 5L, 5L, NA, 5L, 5L, 5L, 4L, 4L, 4L, 
3L, 3L, 2L, 1L, 3L, 5L, 3L, 1L, 5L, NA, 3L, 2L, 5L, 4L, 4L, 4L, 
2L, 1L, 1L, 2L, 5L, 2L, 1L, 3L, 4L, 3L, 1L, 1L, 4L, 4L, 1L, 2L, 
3L, 3L, 4L, NA, 3L, 3L, 2L, 2L, NA, 3L, 5L, 4L, 4L, 3L, 3L, 4L, 
NA, NA, 3L, NA, 1L, NA, 3L, 3L, 3L, 2L, 3L, 3L, 4L, 1L, 1L, 2L, 
5L, 1L, 1L, 5L, 2L, 2L, 2L, 3L, 4L, 5L, 3L, NA, NA, 2L, 2L, 3L, 
2L, 3L, 2L, 3L, 1L, 3L, 3L, 4L, 5L, NA, 4L, 4L, 3L, 1L, 4L, 5L, 
4L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 5L, 1L, 5L, 2L, NA, 4L, 2L, 
1L, 3L, 3L, 4L, 3L, 2L, 4L, 5L, 4L, 2L, 3L, 5L, 1L, NA, 3L, 2L, 
5L, NA, 1L, 2L, 4L, 5L, 2L, NA, 1L, 3L, NA, 3L, 3L, 3L, 5L, 4L, 
5L, 3L, 3L, NA, 4L, 2L, 3L, 2L, 5L, 4L, 4L, 5L, 5L, 3L, 2L, NA, 
4L, 1L, 5L, 2L, 4L, 3L, 4L, NA, 3L, 1L, 3L), group = structure(c(1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("1", "2"), class = "factor"), 
    weights = c(1.445, 0.408, 0.621, 0.961, 1.492, 0.625, 1.131, 
    0.246, 0.612, 0.621, 1.311, 0.649, 1.282, 0.898, 1.268, 0.641, 
    0.764, 0.759, 0.306, 0.707, 0.899, 0.785, 1.279, 0.458, 0.882, 
    0.384, 1.492, 0.468, 0.785, 0.707, 0.489, 1.113, 0.692, 0.293, 
    0.642, 1.327, 0.362, 1.405, 1.173, 0.732, 0.661, 0.522, 1.001, 
    0.374, 1.181, 0.819, 1.389, 0.43, 0.477, 0.879, 0.634, 0.417, 
    0.359, 1.007, 0.866, 0.203, 1.469, 0.294, 1.326, 1.391, 0.871, 
    1.036, 1.251, 0.417, 1.074, 1.268, 0.963, 0.469, 0.215, 1.074, 
    0.644, 1.054, 0.787, 0.714, 0.568, 0.397, 1.421, 0.692, 0.262, 
    0.644, 0.793, 0.808, 0.25, 0.842, 1.039, 0.359, 0.987, 1.257, 
    0.301, 0.203, 0.823, 1.328, 1.192, 0.256, 1.099, 0.668, 1.129, 
    0.413, 0.266, 1.121, 0.893, 1.484, 0.568, 1.255, 0.531, 0.461, 
    0.773, 0.298, 0.233, 0.676, 0.478, 0.806, 0.556, 0.201, 0.801, 
    0.348, 1.396, 0.552, 0.384, 0.615, 0.499, 0.819, 0.954, 0.943, 
    0.956, 0.323, 0.706, 0.699, 0.9, 1.156, 1.436, 1.115, 0.762, 
    0.258, 1.421, 0.644, 1.349, 0.251, 0.735, 0.479, 1.055, 1.395, 
    1.062, 1.155, 0.869, 0.436, 0.415, 0.745, 1.247, 0.21, 0.879, 
    0.776, 0.747, 0.835, 0.609, 0.733, 0.563, 1.067, 1.436, 0.679, 
    1.497, 1.385, 1.087, 1.286, 0.503, 0.738, 0.504, 0.665, 1.421, 
    1.288, 0.691, 0.972, 0.467, 0.425, 0.406, 0.862, 0.749, 0.935, 
    0.291, 0.444, 1.118, 1.048, 0.886, 0.982, 0.578, 1.402, 0.778, 
    1.139, 0.804, 0.618, 1.147, 0.594, 0.984, 0.986, 0.941, 0.794, 
    0.323, 1.41, 0.902, 0.417)), row.names = c(NA, 200L), class = "data.frame")