函数嵌套时如何在三个不同的DF上使用lapply或申请bootstrapping或KM计算?

How do I use lapply or sapply for boostraping or KM calculations on three different DFs when functions are nested?

我想 运行 bootstrap (k=10,000) 和 Kaplan-Meier 使用 lapply 或对三个不同数据框的数字列进行计算当函数嵌套时应用。

我定义了三个函数作为自举命令中的参数。 一个函数 returns 一组预定义的分位数,另一个是中位数,第三个是中位数的 95% 置信区间。引导完全失败。 bootstrapping 读取的错误是 "x[ "Result", drop = FALSE] 中的错误:维数不正确"

KM 仅针对列表中的第一个数据帧 (df) 完成。

我正在使用 bootNADA2 库进行计算。

下面是数据、函数和命令的 REPREX:

library(boot)
library(wakefield)

#Generate three random datasets as data frames for example

#df
Result <- rnorm(1000,10,1)
Cens <- r_sample_logical(1000, name = "Cens")  
df <- data.frame(Result,Cens)

#df1
Result <- rnorm(1000,10,1)
Cens <- r_sample_logical(1000, name = "Cens")  
df1 <- data.frame(Result,Cens)

#df2
Result <- rnorm(1000,10,1)
Cens <- r_sample_logical(1000, name = "Cens")  
df2 <- data.frame(Result,Cens)

x <- c(df, df1, df2)

#I know I can use built-in R base functions in the following way:

lapply(x, quantile)
sapply(x, quantile)

#How do I use lapply and sapply when funtions are nested like in the example below?

#Bootstrap
medianfun <- function(x, i){
  d <- x[i, ]
  return(median(d))   
}
meanfun <- function(x, i){
  d <- x[i, ]
  return(mean(d))   
}
quantfun <- function(x, i){
  d <- x[i, ]
  return(quantile(d, c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.95)))   
}

set.seed(1234)
bo.quant<- boot(x[, "Result", drop = FALSE], statistic=quantfun, R=10000)
bo.quant
bo.median <- boot(x[, "Result", drop = FALSE], statistic=medianfun, R=10000)
bo.median
bo.95ci.df <- boot.ci(bo.median, conf=0.95, type="bca")
bo.95ci.df

#Kaplan Meier
library(NADA2)
tddt.km = cfit(x$Result,x$Censored, qtls = c(.1,.2,.3,.4,.5,.8,.845,.85,.90,.95))
tddt.km

首先你应该使用 list 而不是 c 并给数据框起这样的名字:

x <- list(df=df, df1=df1, df2=df2)

然后调整您的函数以适应列表:

#Bootstrap
medianfun <- function(x, i) {
  d <- x[i]
  return(median(d))   
}
meanfun <- function(x, i) {
  d <- x[i]
  return(mean(d))   
}
quantfun <- function(x, i) {
  d <- x[i]
  return(quantile(d, c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95)))   
}

这使您可以使用 lapply 在列表中获取结果:

set.seed(1234)
R <- 199
bo.quant <- lapply(x, function(z) boot(z$Result, statistic=quantfun, R=R))
bo.quant$df  ## access list `df`. `df1` and `df2` accordingly
# ORDINARY NONPARAMETRIC BOOTSTRAP
# 
# 
# Call:
#   boot(data = z$Result, statistic = quantfun, R = R)
# 
# 
# Bootstrap Statistics :
#   original        bias    std. error
# t1*   8.710887 -1.294426e-02  0.04987613
# t2*   9.188977 -2.396393e-02  0.06190039
# t3*   9.454297  8.847132e-03  0.03356391
# t4*   9.717689  9.789525e-03  0.04214934
# t5*   9.998288 -7.193749e-04  0.04270117
# t6*  10.259694 -1.568850e-04  0.03849390
# t7*  10.558218  4.143536e-03  0.05029767
# t8*  10.876818  7.381331e-03  0.04242551
# t9*  11.297995 -2.533015e-03  0.05898180
# t10* 11.706104  7.215891e-05  0.08313804

bo.median <- lapply(x, function(z) boot(z$Result, statistic=medianfun, R=R))
bo.median$df

bo.95ci.df <- boot.ci(bo.median, conf=0.95, type="bca")
bo.95ci.df
# ORDINARY NONPARAMETRIC BOOTSTRAP
# 
# 
# Call:
#   boot(data = z$Result, statistic = medianfun, R = R)
# 
# 
# Bootstrap Statistics :
#   original       bias    std. error
# t1* 9.998288 -0.005605121  0.04629431

#Kaplan Meier
library(NADA2)
tddt.km <- lapply(x, function(z) 
  cfit(z$Result, z$Cens, qtls=c(.1, .2, .3, .4, .5, .8, .845, .85, .90, .95),
  Cdf=FALSE, printstat=FALSE))
tddt.km$df
# N PctND Conf KMmean  KMsd KMmedian LCLmean UCLmean LCLmedian UCLmedian   Q10   Q20    Q30   Q40    Q50    Q80  Q84.5    Q85    Q90    Q95
# 1000  49.9   95  9.321 1.225   <9.365   9.294   9.348     9.257     9.442 7.633 8.207 <8.652 8.997 <9.365 <10.42 <10.64 <10.67 <10.94 <11.31