如何简化数据框中多个组的重复操作?

How to simplify repetitive actions for mulitple groups in data frame?

我正在尝试使用循环或应用语句来简化这些操作。

我无法共享原始文件,但这里有一个用于复制的演示数据。

ae.rmst <- data.frame(
  trno=c(1, 1, 1, 0, 0, 0), 
  toxdeg=c(0, 2, 3, 1, 0, 2), 
  duration.month=c(4.2, 3.2, 1.2, 5, 9.1, 1.1), 
  status=c(1, 1, 1, 1, 1, 1)

在我的代码中,我正在尝试执行以下操作:

  1. 对每个数据帧执行rmst2函数
  2. 从摘要中提取值
  3. 将提取的值放入 table

这是我的代码:

ae.df0 = ae.rmst %>% filter(toxdeg == "0")
    rmst0 = rmst2(ae.df0$duration.month, ae.df0$status, ae.df0$trno, covariates = NULL, alpha = 0.05)
    
    ae.df1 = ae.rmst %>% filter(toxdeg == "1")
    rmst1 = rmst2(ae.df1$duration.month, ae.df1$status, ae.df1$trno, covariates = NULL, alpha = 0.05)
    
    ae.df2 = ae.rmst %>% filter(toxdeg == "2")
    rmst2 = rmst2(ae.df2$duration.month, ae.df2$status, ae.df2$trno, covariates = NULL, alpha = 0.05)
    
    ae.df3 = ae.rmst %>% filter(toxdeg == "3")
    rmst3 = rmst2(ae.df3$duration.month, ae.df3$status, ae.df3$trno, covariates = NULL, alpha = 0.05)
    
    
    rmst_pvalue = data.frame(rmst0$unadjusted.result[10],
                             rmst1$unadjusted.result[10],
                             rmst2$unadjusted.result[10],
                             rmst3$unadjusted.result[10],
                             
                             rmst0$unadjusted.result[10],
                             rmst1$unadjusted.result[10],
                             rmst2$unadjusted.result[10],
                             rmst3$unadjusted.result[10]
    ) %>%
      t() %>%
      as.data.frame() %>%
      rename("p value" = V1)
    
    rmst.table = data.frame(
      rmst0$RMST.arm0["rmst"],
      rmst1$RMST.arm0["rmst"],
      rmst2$RMST.arm0["rmst"],
      rmst3$RMST.arm0["rmst"],
      
      rmst0$RMST.arm1["rmst"],
      rmst1$RMST.arm1["rmst"],
      rmst2$RMST.arm1["rmst"],
      rmst3$RMST.arm1["rmst"]) %>%
      t() %>%
      cbind(rmst_pvalue) %>%
      as.data.frame() %>%
      mutate(Arm = c(1,1,1,1,2,2,2,2),
             Arm = as.integer(Arm),
             toxdeg = c(0,1,2,3,0,1,2,3),
             toxdeg = as.integer(toxdeg)) %>%
      select(Arm, toxdeg, Est., "lower .95", "upper .95", "p value") %>%
      rename("Toxicity Degree" = toxdeg,
             Estimates = Est.)

使用by方法。

res <- do.call(rbind.data.frame, by(ae.rmst, ae.rmst$toxdeg, \(x) {
  rmst <- survRM2::rmst2(x$duration.month, x$status, x$trno, 
                         covariates=NULL, alpha=.05)
  nm <- c("estimate", "se", "lower_95", "upper_95")
  arm1 <- rmst$RMST.arm0[["rmst"]] |> setNames(nm)
  arm2 <- rmst$RMST.arm1[["rmst"]] |> setNames(nm)
  pval <- rmst$unadjusted.result[1, 4]
  cbind(arm=1:2, tox_degree=el(x$toxdeg), t(cbind(arm1, arm2)), pval)
}))

res[order(res$arm), ]
#        arm tox_degree  estimate se lower_95 upper_95 pval
# 0.arm1   1          0       9.1  0      9.1      9.1    0
# 1.arm1   1          1       5.0  0      5.0      5.0    0
# 2.arm1   1          2       1.1  0      1.1      1.1    0
# 3.arm1   1          3       1.9  0      1.9      1.9    0
# 0.arm2   2          0       4.2  0      4.2      4.2    0
# 1.arm2   2          1       3.2  0      3.2      3.2    0
# 2.arm2   2          2       3.1  0      3.1      3.1    0
# 3.arm2   2          3       1.2  0      1.2      1.2    0

注意: R >= 4.1 使用。


数据:

ae.rmst <- structure(list(trno = c(1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L), toxdeg = c(0L, 
1L, 2L, 3L, 0L, 1L, 2L, 3L), duration.month = c(4.2, 3.2, 3.1, 
1.2, 9.1, 5, 1.1, 1.9), status = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L)), class = "data.frame", row.names = c(NA, -8L))