函数嵌套时如何在三个不同的DF上使用lapply或申请bootstrapping或KM计算?
How do I use lapply or sapply for boostraping or KM calculations on three different DFs when functions are nested?
我想 运行 bootstrap (k=10,000) 和 Kaplan-Meier 使用 lapply 或对三个不同数据框的数字列进行计算当函数嵌套时应用。
我定义了三个函数作为自举命令中的参数。
一个函数 returns 一组预定义的分位数,另一个是中位数,第三个是中位数的 95% 置信区间。引导完全失败。 bootstrapping 读取的错误是 "x[ "Result", drop = FALSE] 中的错误:维数不正确"
KM 仅针对列表中的第一个数据帧 (df) 完成。
我正在使用 boot 和 NADA2 库进行计算。
下面是数据、函数和命令的 REPREX:
library(boot)
library(wakefield)
#Generate three random datasets as data frames for example
#df
Result <- rnorm(1000,10,1)
Cens <- r_sample_logical(1000, name = "Cens")
df <- data.frame(Result,Cens)
#df1
Result <- rnorm(1000,10,1)
Cens <- r_sample_logical(1000, name = "Cens")
df1 <- data.frame(Result,Cens)
#df2
Result <- rnorm(1000,10,1)
Cens <- r_sample_logical(1000, name = "Cens")
df2 <- data.frame(Result,Cens)
x <- c(df, df1, df2)
#I know I can use built-in R base functions in the following way:
lapply(x, quantile)
sapply(x, quantile)
#How do I use lapply and sapply when funtions are nested like in the example below?
#Bootstrap
medianfun <- function(x, i){
d <- x[i, ]
return(median(d))
}
meanfun <- function(x, i){
d <- x[i, ]
return(mean(d))
}
quantfun <- function(x, i){
d <- x[i, ]
return(quantile(d, c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.95)))
}
set.seed(1234)
bo.quant<- boot(x[, "Result", drop = FALSE], statistic=quantfun, R=10000)
bo.quant
bo.median <- boot(x[, "Result", drop = FALSE], statistic=medianfun, R=10000)
bo.median
bo.95ci.df <- boot.ci(bo.median, conf=0.95, type="bca")
bo.95ci.df
#Kaplan Meier
library(NADA2)
tddt.km = cfit(x$Result,x$Censored, qtls = c(.1,.2,.3,.4,.5,.8,.845,.85,.90,.95))
tddt.km
首先你应该使用 list
而不是 c
并给数据框起这样的名字:
x <- list(df=df, df1=df1, df2=df2)
然后调整您的函数以适应列表:
#Bootstrap
medianfun <- function(x, i) {
d <- x[i]
return(median(d))
}
meanfun <- function(x, i) {
d <- x[i]
return(mean(d))
}
quantfun <- function(x, i) {
d <- x[i]
return(quantile(d, c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95)))
}
这使您可以使用 lapply
在列表中获取结果:
set.seed(1234)
R <- 199
bo.quant <- lapply(x, function(z) boot(z$Result, statistic=quantfun, R=R))
bo.quant$df ## access list `df`. `df1` and `df2` accordingly
# ORDINARY NONPARAMETRIC BOOTSTRAP
#
#
# Call:
# boot(data = z$Result, statistic = quantfun, R = R)
#
#
# Bootstrap Statistics :
# original bias std. error
# t1* 8.710887 -1.294426e-02 0.04987613
# t2* 9.188977 -2.396393e-02 0.06190039
# t3* 9.454297 8.847132e-03 0.03356391
# t4* 9.717689 9.789525e-03 0.04214934
# t5* 9.998288 -7.193749e-04 0.04270117
# t6* 10.259694 -1.568850e-04 0.03849390
# t7* 10.558218 4.143536e-03 0.05029767
# t8* 10.876818 7.381331e-03 0.04242551
# t9* 11.297995 -2.533015e-03 0.05898180
# t10* 11.706104 7.215891e-05 0.08313804
bo.median <- lapply(x, function(z) boot(z$Result, statistic=medianfun, R=R))
bo.median$df
bo.95ci.df <- boot.ci(bo.median, conf=0.95, type="bca")
bo.95ci.df
# ORDINARY NONPARAMETRIC BOOTSTRAP
#
#
# Call:
# boot(data = z$Result, statistic = medianfun, R = R)
#
#
# Bootstrap Statistics :
# original bias std. error
# t1* 9.998288 -0.005605121 0.04629431
#Kaplan Meier
library(NADA2)
tddt.km <- lapply(x, function(z)
cfit(z$Result, z$Cens, qtls=c(.1, .2, .3, .4, .5, .8, .845, .85, .90, .95),
Cdf=FALSE, printstat=FALSE))
tddt.km$df
# N PctND Conf KMmean KMsd KMmedian LCLmean UCLmean LCLmedian UCLmedian Q10 Q20 Q30 Q40 Q50 Q80 Q84.5 Q85 Q90 Q95
# 1000 49.9 95 9.321 1.225 <9.365 9.294 9.348 9.257 9.442 7.633 8.207 <8.652 8.997 <9.365 <10.42 <10.64 <10.67 <10.94 <11.31
我想 运行 bootstrap (k=10,000) 和 Kaplan-Meier 使用 lapply 或对三个不同数据框的数字列进行计算当函数嵌套时应用。
我定义了三个函数作为自举命令中的参数。 一个函数 returns 一组预定义的分位数,另一个是中位数,第三个是中位数的 95% 置信区间。引导完全失败。 bootstrapping 读取的错误是 "x[ "Result", drop = FALSE] 中的错误:维数不正确"
KM 仅针对列表中的第一个数据帧 (df) 完成。
我正在使用 boot 和 NADA2 库进行计算。
下面是数据、函数和命令的 REPREX:
library(boot)
library(wakefield)
#Generate three random datasets as data frames for example
#df
Result <- rnorm(1000,10,1)
Cens <- r_sample_logical(1000, name = "Cens")
df <- data.frame(Result,Cens)
#df1
Result <- rnorm(1000,10,1)
Cens <- r_sample_logical(1000, name = "Cens")
df1 <- data.frame(Result,Cens)
#df2
Result <- rnorm(1000,10,1)
Cens <- r_sample_logical(1000, name = "Cens")
df2 <- data.frame(Result,Cens)
x <- c(df, df1, df2)
#I know I can use built-in R base functions in the following way:
lapply(x, quantile)
sapply(x, quantile)
#How do I use lapply and sapply when funtions are nested like in the example below?
#Bootstrap
medianfun <- function(x, i){
d <- x[i, ]
return(median(d))
}
meanfun <- function(x, i){
d <- x[i, ]
return(mean(d))
}
quantfun <- function(x, i){
d <- x[i, ]
return(quantile(d, c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.95)))
}
set.seed(1234)
bo.quant<- boot(x[, "Result", drop = FALSE], statistic=quantfun, R=10000)
bo.quant
bo.median <- boot(x[, "Result", drop = FALSE], statistic=medianfun, R=10000)
bo.median
bo.95ci.df <- boot.ci(bo.median, conf=0.95, type="bca")
bo.95ci.df
#Kaplan Meier
library(NADA2)
tddt.km = cfit(x$Result,x$Censored, qtls = c(.1,.2,.3,.4,.5,.8,.845,.85,.90,.95))
tddt.km
首先你应该使用 list
而不是 c
并给数据框起这样的名字:
x <- list(df=df, df1=df1, df2=df2)
然后调整您的函数以适应列表:
#Bootstrap
medianfun <- function(x, i) {
d <- x[i]
return(median(d))
}
meanfun <- function(x, i) {
d <- x[i]
return(mean(d))
}
quantfun <- function(x, i) {
d <- x[i]
return(quantile(d, c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95)))
}
这使您可以使用 lapply
在列表中获取结果:
set.seed(1234)
R <- 199
bo.quant <- lapply(x, function(z) boot(z$Result, statistic=quantfun, R=R))
bo.quant$df ## access list `df`. `df1` and `df2` accordingly
# ORDINARY NONPARAMETRIC BOOTSTRAP
#
#
# Call:
# boot(data = z$Result, statistic = quantfun, R = R)
#
#
# Bootstrap Statistics :
# original bias std. error
# t1* 8.710887 -1.294426e-02 0.04987613
# t2* 9.188977 -2.396393e-02 0.06190039
# t3* 9.454297 8.847132e-03 0.03356391
# t4* 9.717689 9.789525e-03 0.04214934
# t5* 9.998288 -7.193749e-04 0.04270117
# t6* 10.259694 -1.568850e-04 0.03849390
# t7* 10.558218 4.143536e-03 0.05029767
# t8* 10.876818 7.381331e-03 0.04242551
# t9* 11.297995 -2.533015e-03 0.05898180
# t10* 11.706104 7.215891e-05 0.08313804
bo.median <- lapply(x, function(z) boot(z$Result, statistic=medianfun, R=R))
bo.median$df
bo.95ci.df <- boot.ci(bo.median, conf=0.95, type="bca")
bo.95ci.df
# ORDINARY NONPARAMETRIC BOOTSTRAP
#
#
# Call:
# boot(data = z$Result, statistic = medianfun, R = R)
#
#
# Bootstrap Statistics :
# original bias std. error
# t1* 9.998288 -0.005605121 0.04629431
#Kaplan Meier
library(NADA2)
tddt.km <- lapply(x, function(z)
cfit(z$Result, z$Cens, qtls=c(.1, .2, .3, .4, .5, .8, .845, .85, .90, .95),
Cdf=FALSE, printstat=FALSE))
tddt.km$df
# N PctND Conf KMmean KMsd KMmedian LCLmean UCLmean LCLmedian UCLmedian Q10 Q20 Q30 Q40 Q50 Q80 Q84.5 Q85 Q90 Q95
# 1000 49.9 95 9.321 1.225 <9.365 9.294 9.348 9.257 9.442 7.633 8.207 <8.652 8.997 <9.365 <10.42 <10.64 <10.67 <10.94 <11.31